library(stringr)
library(data.table)
library(lubridate)
library(tictoc)
library(dplyr)
library(dtplyr)
library(zoo)
library(openxlsx)
library(taRifx)
library(haven)
library(sandwich)
library(lmtest)
library(sqldf)
library(ggplot2)
library(stringr)
library(data.table)
library(lubridate)
library(tictoc)
library(dplyr)
library(dtplyr)
library(zoo)
library(stats)

rm(list=ls())
options(max.print=999999)



# Sets the current path
current_path <- "XXX"
setwd(current_path)


#setwd("~/Corporate_Bond_TRACE/data/TRACE_standard")

#"Additional_bond_attributes.csv" is bond domicile ("COUNTRY_DOMICILE") information from Mergent FISD-Bond Issue data
Bond_domicile<-fread("Additional_bond_attributes.csv")


for (E in c("Eligible","Ineligible")){

  TradeData_filter_masterfundamental<-
    fread("Data_FilterMerge_standard_TRACE.csv")

  print(nrow(TradeData_filter_masterfundamental))
  
  #################################################################################################
  ###Part-1: process data
  
  #################################################################################################
  # distribution of price and quantity #
  #################################################################################################
  print(summary(TradeData_filter_masterfundamental$rptd_pr))
  print(summary(TradeData_filter_masterfundamental$`Quantity(ParVal)_num`))
  l01pt_price<-quantile(as.matrix(TradeData_filter_masterfundamental$rptd_pr),probs = 0.001,na.rm = TRUE)
  h01pt_price<-quantile(as.matrix(TradeData_filter_masterfundamental$rptd_pr),probs = 0.999,na.rm = TRUE)
  l01pt_vol<-quantile(as.matrix(TradeData_filter_masterfundamental$`Quantity(ParVal)_num`),probs = 0.001,na.rm = TRUE)
  h01pt_vol<-quantile(as.matrix(TradeData_filter_masterfundamental$`Quantity(ParVal)_num`),probs = 0.999,na.rm = TRUE)
  
  TradeData_filter_masterfundamental[rptd_pr<l01pt_price,`:=`(rptd_pr=l01pt_price)]
  TradeData_filter_masterfundamental[rptd_pr>h01pt_price,`:=`(rptd_pr=h01pt_price)]
  TradeData_filter_masterfundamental[`Quantity(ParVal)_num`<l01pt_vol,`:=`(`Quantity(ParVal)_num`=l01pt_vol)]
  
  #################################################################################################
  # dummies for interdealer, customer, dealer-sell-to-customer and customer-sell-to-dealer trades #
  #################################################################################################
  TradeData_filter_masterfundamental[,`:=`(dum_interdealer=0, dum_customer=0, dum_dealer_to_customer=0, dum_customer_to_dealer=0)]
  TradeData_filter_masterfundamental[trade_type=="D2D", dum_interdealer:=1]
  TradeData_filter_masterfundamental[trade_type=="C2D" | trade_type=="D2C" , dum_customer:=1]
  TradeData_filter_masterfundamental[trade_type=="D2C", 
                                     dum_dealer_to_customer:=1]
  TradeData_filter_masterfundamental[trade_type=="C2D", 
                                     dum_customer_to_dealer:=1]
  
  #################################################################################################
  # execution time #
  #################################################################################################
  # TradeData_filter_masterfundamental[,datetime_exctn := 
  #                    paste0(trd_exctn_dt,"T",str_pad(`Execution Time`,width=6,side="left",pad="0"),
  #                           "Z")]
  TradeData_filter_masterfundamental[,datetime_exctn := ymd_hms(datetime_exctn)]

  TradeData_filter_masterfundamental[,`:=`(datetime_exctn=as_datetime(datetime_exctn))]
  TradeData_filter_masterfundamental[,`:=`(new_datetime_exctn=as_datetime(datetime_exctn))]
  
  merge_index<-c("cusip_id","bond_sym_id","company_symbol")
  setcolorder(TradeData_filter_masterfundamental,
              c(merge_index,"trd_exctn_dt_s",
                "datetime_exctn","new_datetime_exctn"))

  #################################################################################################
  # time_to_maturity with 5 years cutoff #
  #################################################################################################
  TradeData_filter_masterfundamental<-
    TradeData_filter_masterfundamental[time_to_maturity>=-10]
  print(nrow(TradeData_filter_masterfundamental))
  TradeData_filter_masterfundamental[,`:=`(dum_TTM5yless=0,
                                           dum_TTM5ymore=0)]
  TradeData_filter_masterfundamental[time_to_maturity<=1825,
                                     `:=`(dum_TTM5yless=1)]
  TradeData_filter_masterfundamental[time_to_maturity>1825,
                                     `:=`(dum_TTM5ymore=1)]
  
  #################################################################################################
  # bond grade #
  #################################################################################################
  
  ###$$$###
  TradeData_filter_masterfundamental[,`:=`(bond_grade=mean_grade)]
  #TradeData_filter_masterfundamental[,`:=`(bond_grade=new_bond_grade4)]
  #TradeData_filter_masterfundamental[,`:=`(bond_grade=new_bond_grade3)]
  #TradeData_filter_masterfundamental[,`:=`(bond_grade=new_bond_grade2)]
  #TradeData_filter_masterfundamental[,`:=`(bond_grade=new_bond_grade)]
  #TradeData_filter_masterfundamental[,`:=`(bond_grade=mean_grade)]
  print(table(TradeData_filter_masterfundamental$bond_grade))
  print(message(paste0("number of IG bonds is ",nrow(unique(as.matrix(TradeData_filter_masterfundamental[bond_grade=="I",c("cusip_id"),with=FALSE]))))))
  print(message(paste0("number of HY bonds is ",nrow(unique(as.matrix(TradeData_filter_masterfundamental[bond_grade=="H",c("cusip_id"),with=FALSE]))))))
  print(message(paste0("number of NR bonds is ",nrow(unique(as.matrix(TradeData_filter_masterfundamental[bond_grade=="NR",c("cusip_id"),with=FALSE]))))))
  
  #################################################################################################
  # industry and US/nonUS #
  #################################################################################################
  # TradeData_filter_masterfundamental<-
  #   left_join(TradeData_filter_masterfundamental,Bond_domicile_new,by=c("issuer_cusip"))
  
  TradeData_filter_masterfundamental<-as.data.table(TradeData_filter_masterfundamental)
  
  TradeData_filter_masterfundamental[,`:=`(INDUSTRY="",
                                           US_indicator="")]
  
  TradeData_filter_masterfundamental[industry_code>=20&industry_code<=26,
                                     `:=`(INDUSTRY="Finance")]
  TradeData_filter_masterfundamental[industry_code==12,
                                     `:=`(INDUSTRY="Oil&Gas")]
  TradeData_filter_masterfundamental[industry_code==16,
                                     `:=`(INDUSTRY="Transportation")]
  TradeData_filter_masterfundamental[industry_code==30|industry_code==31|industry_code==33,
                                     `:=`(INDUSTRY="Utility")]
  
  print(table(TradeData_filter_masterfundamental$INDUSTRY))
  
  print(table(TradeData_filter_masterfundamental$country_domicile))
  
  Bond_domicile[,`:=`(count_seq=sequence(.N)),by=c("issuer_cusip")]
  Bond_domicile<-Bond_domicile[count_seq==1]
  Bond_domicile_new<-Bond_domicile[,c("issuer_cusip","country_domicile","Industry"),with=FALSE]
  setnames(TradeData_filter_masterfundamental,c("issuer_cusip.x"),c("issuer_cusip"))
  TradeData_filter_masterfundamental<-left_join(TradeData_filter_masterfundamental,Bond_domicile_new,by=c("issuer_cusip"))
  
  TradeData_filter_masterfundamental<-as.data.table(TradeData_filter_masterfundamental)
  print(message(paste0("prop of missing country_domicile is ",nrow(TradeData_filter_masterfundamental[is.na(country_domicile)])/
                         nrow(TradeData_filter_masterfundamental))))
  
  TradeData_filter_masterfundamental[country_domicile=="USA",
                                     `:=`(US_indicator="US")]
  TradeData_filter_masterfundamental[country_domicile!="USA"&(!is.na(country_domicile)),
                                     `:=`(US_indicator="non-US")]
  
  print(table(TradeData_filter_masterfundamental$INDUSTRY))
  print(table(TradeData_filter_masterfundamental$Industry))
  print(table(TradeData_filter_masterfundamental$US_indicator))
  
  TradeData_filter_masterfundamental[,`:=`(dum_US=0,
                                           dum_nonUS=0,
                                           dum_Finance=0,
                                           dum_OilGas=0,dum_Transportation=0,dum_Utility=0)]
  
  TradeData_filter_masterfundamental[INDUSTRY=="Finance",`:=`(dum_Finance=1)]
  TradeData_filter_masterfundamental[INDUSTRY=="Oil&Gas",`:=`(dum_OilGas=1)]
  TradeData_filter_masterfundamental[INDUSTRY=="Transportation",`:=`(dum_Transportation=1)]
  TradeData_filter_masterfundamental[INDUSTRY=="Utility",`:=`(dum_Utility=1)]
  TradeData_filter_masterfundamental[US_indicator=="US",`:=`(dum_US=1)]
  TradeData_filter_masterfundamental[US_indicator=="non-US",`:=`(dum_nonUS=1)]
  
  ##############################################################
  ###exclude primary market transactions
  print(summary(TradeData_filter_masterfundamental$time_to_offering))
  print(nrow(TradeData_filter_masterfundamental))
  ###we do not keep missing time_to_offering
  TradeData_filter_masterfundamental<-
    TradeData_filter_masterfundamental[time_to_offering>=90]
  print(summary(TradeData_filter_masterfundamental$time_to_offering))
  print(nrow(TradeData_filter_masterfundamental))
  
  ##############################################################
  #For dum_US==1&bond_grade==I&dum_TTM5ymore==1&trd_exctn_dt_s<="2020-03-23", we need to calculate how many days they are 
  #to March 23, 2020. 
  TradeData_filter_masterfundamental[
    ,
    `:=`(days_to_March23=as.IDate(trd_exctn_dt_s)-as.IDate("2020-03-23"))
  ]
  print(summary(TradeData_filter_masterfundamental$days_to_March23))
  #we need dum_US==1, bond_grade==I, dum_TTM5yless==1 & (dum_TTM5ymore==1&trd_exctn_dt_s<="2020-03-23"&time_to_maturity<=5*365-days_to_March23)
  EligibleBondsList_TradeData_filter_masterfundamental_standard<-
    as.data.table(unique(TradeData_filter_masterfundamental[
      ((dum_US==1)&
         (bond_grade=="I")&
         (dum_TTM5yless==1))|
        ((dum_US==1)&
           (bond_grade=="I")&
           dum_TTM5ymore==1&trd_exctn_dt_s<="2020-03-23"&time_to_maturity<=5*365-days_to_March23),
      c("cusip_id"),
      with=FALSE]))
  #proportion of transactions of eligible bonds within all transactions through Jan-Mar, 2020: 35.12%
  print(nrow(TradeData_filter_masterfundamental[
    ((dum_US==1)&
       (bond_grade=="I")&
       (dum_TTM5yless==1))|
      ((dum_US==1)&
         (bond_grade=="I")&
         dum_TTM5ymore==1&trd_exctn_dt_s<="2020-03-23"&time_to_maturity<=5*365-days_to_March23)])/
      nrow(TradeData_filter_masterfundamental))
  #proportion of eligible bonds within all bonds through Jan-Mar, 2020: 27.53%
  print(nrow(unique(TradeData_filter_masterfundamental[
    ((dum_US==1)&
       (bond_grade=="I")&
       (dum_TTM5yless==1))|
      ((dum_US==1)&
         (bond_grade=="I")&
         dum_TTM5ymore==1&trd_exctn_dt_s<="2020-03-23"&time_to_maturity<=5*365-days_to_March23),
    c("cusip_id"),
    with=FALSE
  ]))/
    nrow(unique(TradeData_filter_masterfundamental[,c("cusip_id"),with=FALSE])))
  
  ###save the list of eligible bonds.
  fwrite(EligibleBondsList_TradeData_filter_masterfundamental_standard,
         "EligibleBondsList_TradeData_filter_masterfundamental_standard.csv")
  ##############################################################
  if (E=="Eligible"){
  TradeData_filter_masterfundamental<-
    TradeData_filter_masterfundamental[
      ((dum_US==1)&
         (bond_grade=="I")&
         (dum_TTM5yless==1))|
        ((dum_US==1)&
           (bond_grade=="I")&
           dum_TTM5ymore==1&trd_exctn_dt_s<="2020-03-23"&time_to_maturity<=5*365-days_to_March23)]
  }else{
    TradeData_filter_masterfundamental<-
      TradeData_filter_masterfundamental[!(cusip_id %in% c(EligibleBondsList_TradeData_filter_masterfundamental_standard$cusip_id))]
  }
  
  ##############################################################
  ExtraDum<-c("dum_US","dum_nonUS","dum_Finance","dum_OilGas","dum_Transportation","dum_Utility",
              "dum_TTM5ymore","dum_TTM5yless")
  
  setnames(TradeData_filter_masterfundamental,
           c("Quantity(ParVal)","Quantity(ParVal)_num"),
           c("Quantity","Quantity_num"))
  
  #firstly look at all C2D, D2D and D2C, then specifically look at REMAINING C2D_RC and D2C_RC
  TradeData_filter_masterfundamental[,ID:=.I]
  
  TMP_C2D <- TradeData_filter_masterfundamental[(dum_customer_to_dealer==1),
                                                c("cusip_id","trd_exctn_dt","new_datetime_exctn","rptd_pr","Quantity_num","bond_grade","Quantity",
                                                  "trade_type","remuneration","ID",ExtraDum),
                                                with=FALSE]
  
  TMP_D2D <- TradeData_filter_masterfundamental[dum_interdealer==1,c("cusip_id","trd_exctn_dt","new_datetime_exctn","rptd_pr","Quantity_num","bond_grade","Quantity",
                                                                     "trade_type","remuneration","ID",ExtraDum),
                                                with=FALSE]
  
  TMP_D2C <- TradeData_filter_masterfundamental[(dum_dealer_to_customer==1),c("cusip_id","trd_exctn_dt","new_datetime_exctn","rptd_pr","Quantity_num","bond_grade","Quantity",
                                                                              "trade_type","remuneration","ID",ExtraDum),
                                                with=FALSE]
  
  setnames(TMP_C2D,c("ID"),c("C2D_ID"))
  setnames(TMP_D2D,c("ID"),c("D2D_ID"))
  setnames(TMP_D2C,c("ID"),c("D2C_ID"))
  
  print(nrow(TMP_C2D)+nrow(TMP_D2D)+nrow(TMP_D2C))
  print(nrow(TradeData_filter_masterfundamental))
 
  setnames(TMP_D2C,
           c("cusip_id","trd_exctn_dt","new_datetime_exctn","rptd_pr","Quantity_num","bond_grade","Quantity","trade_type","remuneration",
             ExtraDum),
           paste0(c("cusip_id","trd_exctn_dt","new_datetime_exctn","rptd_pr","Quantity_num","bond_grade","Quantity","trade_type","remuneration",
                    ExtraDum),"_D2C"))
  
  ###change matching standard to be within 15 min. 
  #round-1:TMP_C2D and TMP_D2C
  ###CHECKED timediff is in seconds
  TMP_Agency_1 <- data.table(sqldf('select *,
                                 b.new_datetime_exctn_D2C-a.new_datetime_exctn as timediff
                                 from TMP_C2D as a, TMP_D2C as b
                                 where a.cusip_id=b.cusip_id_D2C and a.trd_exctn_dt=b.trd_exctn_dt_D2C and a.Quantity=b.Quantity_D2C and timediff<=900 and timediff>=0
                                 order by C2D_ID, timediff'))
  ##############################################################
  ###update01/03/2021: relax equal quantity restriction, to allow absolute difference not bigger than 0.1
  # TMP_Agency_1 <- data.table(sqldf('select *,
  #                                b.new_datetime_exctn_D2C-a.new_datetime_exctn as timediff,
  #                                abs(a.Quantity_num-b.Quantity_num_D2C) as quantityabsdiff
  #                                from TMP_C2D as a, TMP_D2C as b
  #                                where a.cusip_id=b.cusip_id_D2C and a.trd_exctn_dt=b.trd_exctn_dt_D2C and quantityabsdiff<0.1 and timediff<=900 and timediff>=0
  #                                order by C2D_ID, timediff'))
  ##############################################################
  
  nrow(TMP_Agency_1)
  TMP_Agency_1[,`:=`(count_seq=sequence(.N)),
               by=c("C2D_ID")]
  TMP_Agency_1<-TMP_Agency_1[count_seq==1]
  nrow(TMP_Agency_1)
  
  # ###the matching is same as below. 
  # fuzzy_mtch_agency<-function(C2D, D2C){
  #   # dt1<-lazy_dt(C2D)
  #   # dt2<-lazy_dt(D2C)
  #   dt1<-as.data.table(C2D)
  #   dt2<-as.data.table(D2C)
  #   Agency_Trades<-dt1%>%
  #     left_join(dt2, by=c("cusip_id" = "cusip_id_D2C", 
  #                         "trd_exctn_dt" = "trd_exctn_dt_D2C"))%>%
  #     mutate(quantdiff= abs(Quantity_num_D2C - Quantity_num),
  #            timediff = new_datetime_exctn_D2C - new_datetime_exctn)%>%
  #     filter(quantdiff<0.1, timediff<=900, timediff>=0)%>%
  #     as.data.table()
  #   Agency_Trades=unique(Agency_Trades, by=c("C2D_ID"))
  #   return(Agency_Trades)
  # }
  # 
  # Reg_Agency_self_gen<-fuzzy_mtch_agency(TMP_C2D, TMP_D2C)
  
  match_TMP_C2D<-TMP_C2D[C2D_ID %in% as.vector(TMP_Agency_1$C2D_ID)]
  unmatch_TMP_C2D<-TMP_C2D[!(C2D_ID %in% as.vector(TMP_Agency_1$C2D_ID))]
  match_TMP_D2C<-TMP_D2C[D2C_ID %in% as.vector(TMP_Agency_1$D2C_ID)]
  unmatch_TMP_D2C<-TMP_D2C[!(D2C_ID %in% as.vector(TMP_Agency_1$D2C_ID))]
  
  ###calculate daily_irc_byvol
  TMP_Agency_1[,`:=`(max_prc_pairwise=pmax(rptd_pr, rptd_pr_D2C, na.rm = TRUE),
                     min_prc_pairwise=pmin(rptd_pr, rptd_pr_D2C, na.rm = TRUE))]
  
  TMP_Agency_1[,daily_irc_byvol:=(max_prc_pairwise-min_prc_pairwise)/max_prc_pairwise,
               by=.(trd_exctn_dt,cusip_id,`Quantity_num`)]
  
  # winsorized at the 1% level each day
  TMP_Agency_1[,
               `:=`(high1pt_daily_irc_byvol=quantile(daily_irc_byvol,probs = 0.99,na.rm = TRUE),
                    low1pt_daily_irc_byvol=quantile(daily_irc_byvol,probs = 0.01,na.rm = TRUE)),
               by=c("trd_exctn_dt")]
  
  TMP_Agency_1[daily_irc_byvol>high1pt_daily_irc_byvol,
               `:=`(daily_irc_byvol=high1pt_daily_irc_byvol)]
  TMP_Agency_1[daily_irc_byvol<low1pt_daily_irc_byvol,
               `:=`(daily_irc_byvol=low1pt_daily_irc_byvol)]
  
  low1pt_allsample_Quantity_num<-quantile(TMP_Agency_1$`Quantity_num`,0.01,na.rm = TRUE)
  
  TMP_Agency_1[`Quantity_num`<low1pt_allsample_Quantity_num,
               `:=`(`Quantity_num`=low1pt_allsample_Quantity_num)]
  
  print(summary(TMP_Agency_1$`Quantity_num`))
  
  fwrite(TMP_Agency_1,paste0("TMP_Agency_1_calculate_new_IRC_original.csv"))
  
  tmp_irc_cusip_date_vol<-TMP_Agency_1[,
                                       c("cusip_id","trd_exctn_dt","daily_irc_byvol","Quantity_num",
                                         "bond_grade",ExtraDum),
                                       with=FALSE]
  
  ###the bond_daily_vol used as weights only includes matched C2D transactions, which makes sense. 
  tmp_irc_cusip_date_vol[,`:=`(wave_daily_irc_byvol=weighted.mean(daily_irc_byvol,
                                                                  Quantity_num,
                                                                  na.rm = TRUE),
                               bond_daily_vol=sum(Quantity_num,na.rm = TRUE),
                               count_seq=sequence(.N)),
                         by=c("cusip_id","trd_exctn_dt")]
  
  tmp_irc_cusip_date_vol<-
    tmp_irc_cusip_date_vol[count_seq==1,
                           c("cusip_id","trd_exctn_dt","wave_daily_irc_byvol","bond_grade","bond_daily_vol",ExtraDum),with=FALSE]
  
  tmp_irc_all_date<-
    tmp_irc_cusip_date_vol[,.(wavemean_wave_daily_irc_byvol_all_bps=weighted.mean(wave_daily_irc_byvol,
                                                                                  bond_daily_vol,
                                                                                  na.rm = TRUE)/(0.01/100),
                              mean_wave_daily_irc_byvol_all_bps=mean(wave_daily_irc_byvol,
                                                                     na.rm = TRUE)/(0.01/100)),
                           by=c("trd_exctn_dt")]
  tmp_irc_IG_date<-
    tmp_irc_cusip_date_vol[bond_grade=="I",.(wavemean_wave_daily_irc_byvol_IG_bps=weighted.mean(wave_daily_irc_byvol,
                                                                                                bond_daily_vol,
                                                                                                na.rm = TRUE)/(0.01/100),
                                             mean_wave_daily_irc_byvol_IG_bps=mean(wave_daily_irc_byvol,
                                                                                   na.rm = TRUE)/(0.01/100)),
                           by=c("trd_exctn_dt")]
  
  tmp_irc_HY_date<-
    tmp_irc_cusip_date_vol[bond_grade=="H",.(wavemean_wave_daily_irc_byvol_HY_bps=weighted.mean(wave_daily_irc_byvol,
                                                                                                bond_daily_vol,
                                                                                                na.rm = TRUE)/(0.01/100),
                                             mean_wave_daily_irc_byvol_HY_bps=mean(wave_daily_irc_byvol,
                                                                                   na.rm = TRUE)/(0.01/100)),
                           by=c("trd_exctn_dt")]
  
  ###continue calculating ircs in other dimensions.
  tmp_irc_TTM5yless_date<-
    tmp_irc_cusip_date_vol[dum_TTM5yless==1,.(wavemean_wave_daily_irc_byvol_TTM5yless_bps=weighted.mean(wave_daily_irc_byvol,
                                                                                                        bond_daily_vol,
                                                                                                        na.rm = TRUE)/(0.01/100),
                                              mean_wave_daily_irc_byvol_TTM5yless_bps=mean(wave_daily_irc_byvol,
                                                                                           na.rm = TRUE)/(0.01/100)),
                           by=c("trd_exctn_dt")]
  
  tmp_irc_TTM5ymore_date<-
    tmp_irc_cusip_date_vol[dum_TTM5ymore==1,.(wavemean_wave_daily_irc_byvol_TTM5ymore_bps=weighted.mean(wave_daily_irc_byvol,
                                                                                                        bond_daily_vol,
                                                                                                        na.rm = TRUE)/(0.01/100),
                                              mean_wave_daily_irc_byvol_TTM5ymore_bps=mean(wave_daily_irc_byvol,
                                                                                           na.rm = TRUE)/(0.01/100)),
                           by=c("trd_exctn_dt")]
  
  tmp_irc_US_date<-
    tmp_irc_cusip_date_vol[dum_US==1,.(wavemean_wave_daily_irc_byvol_US_bps=weighted.mean(wave_daily_irc_byvol,
                                                                                          bond_daily_vol,
                                                                                          na.rm = TRUE)/(0.01/100),
                                       mean_wave_daily_irc_byvol_US_bps=mean(wave_daily_irc_byvol,
                                                                             na.rm = TRUE)/(0.01/100)),
                           by=c("trd_exctn_dt")]
  tmp_irc_nonUS_date<-
    tmp_irc_cusip_date_vol[dum_nonUS==1,.(wavemean_wave_daily_irc_byvol_nonUS_bps=weighted.mean(wave_daily_irc_byvol,
                                                                                                bond_daily_vol,
                                                                                                na.rm = TRUE)/(0.01/100),
                                          mean_wave_daily_irc_byvol_nonUS_bps=mean(wave_daily_irc_byvol,
                                                                                   na.rm = TRUE)/(0.01/100)),
                           by=c("trd_exctn_dt")]
  
  #############################################################
  #calculate MIRC for eligible and non-eligible bonds
  tmp_irc_Eligible_date<-
    tmp_irc_cusip_date_vol[(dum_TTM5yless==1)&
                             (bond_grade=="I")&
                             (dum_US==1),.(wavemean_wave_daily_irc_byvol_Eligible_bps=weighted.mean(wave_daily_irc_byvol,
                                                                                                        bond_daily_vol,
                                                                                                        na.rm = TRUE)/(0.01/100),
                                              mean_wave_daily_irc_byvol_Eligible_bps=mean(wave_daily_irc_byvol,
                                                                                           na.rm = TRUE)/(0.01/100)),
                           by=c("trd_exctn_dt")]
  
  tmp_irc_NonEligible_date<-
    tmp_irc_cusip_date_vol[!((dum_TTM5yless==1)&
                             (bond_grade=="I")&
                             (dum_US==1)),.(wavemean_wave_daily_irc_byvol_NonEligible_bps=weighted.mean(wave_daily_irc_byvol,
                                                                                                    bond_daily_vol,
                                                                                                    na.rm = TRUE)/(0.01/100),
                                           mean_wave_daily_irc_byvol_NonEligible_bps=mean(wave_daily_irc_byvol,
                                                                                       na.rm = TRUE)/(0.01/100)),
                           by=c("trd_exctn_dt")]
  #############################################################
  
  tmp_irc_Finance_date<-
    tmp_irc_cusip_date_vol[dum_Finance==1,.(wavemean_wave_daily_irc_byvol_Finance_bps=weighted.mean(wave_daily_irc_byvol,
                                                                                                    bond_daily_vol,
                                                                                                    na.rm = TRUE)/(0.01/100),
                                            mean_wave_daily_irc_byvol_Finance_bps=mean(wave_daily_irc_byvol,
                                                                                       na.rm = TRUE)/(0.01/100)),
                           by=c("trd_exctn_dt")]
  tmp_irc_Utility_date<-
    tmp_irc_cusip_date_vol[dum_Utility==1,.(wavemean_wave_daily_irc_byvol_Utility_bps=weighted.mean(wave_daily_irc_byvol,
                                                                                                    bond_daily_vol,
                                                                                                    na.rm = TRUE)/(0.01/100),
                                            mean_wave_daily_irc_byvol_Utility_bps=mean(wave_daily_irc_byvol,
                                                                                       na.rm = TRUE)/(0.01/100)),
                           by=c("trd_exctn_dt")]
  tmp_irc_Transportation_date<-
    tmp_irc_cusip_date_vol[dum_Transportation==1,.(wavemean_wave_daily_irc_byvol_Transportation_bps=weighted.mean(wave_daily_irc_byvol,
                                                                                                                  bond_daily_vol,
                                                                                                                  na.rm = TRUE)/(0.01/100),
                                                   mean_wave_daily_irc_byvol_Transportation_bps=mean(wave_daily_irc_byvol,
                                                                                                     na.rm = TRUE)/(0.01/100)),
                           by=c("trd_exctn_dt")]
  tmp_irc_OilGas_date<-
    tmp_irc_cusip_date_vol[dum_OilGas==1,.(wavemean_wave_daily_irc_byvol_OilGas_bps=weighted.mean(wave_daily_irc_byvol,
                                                                                                  bond_daily_vol,
                                                                                                  na.rm = TRUE)/(0.01/100),
                                           mean_wave_daily_irc_byvol_OilGas_bps=mean(wave_daily_irc_byvol,
                                                                                     na.rm = TRUE)/(0.01/100)),
                           by=c("trd_exctn_dt")]
  
  
  print(message(paste0("number of obs in tmp_irc_all_date is:")))
  print(nrow(tmp_irc_all_date))
  
  # tmp_irc_all_date <- left_join(tmp_irc_all_date,tmp_irc_IG_date,by=c("trd_exctn_dt"))
  # tmp_irc_all_date <- left_join(tmp_irc_all_date,tmp_irc_HY_date,by=c("trd_exctn_dt"))
  # tmp_irc_all_date <- left_join(tmp_irc_all_date,tmp_irc_TTM5yless_date,by=c("trd_exctn_dt"))
  # tmp_irc_all_date <- left_join(tmp_irc_all_date,tmp_irc_TTM5ymore_date,by=c("trd_exctn_dt"))
  # tmp_irc_all_date <- left_join(tmp_irc_all_date,tmp_irc_US_date,by=c("trd_exctn_dt"))
  # tmp_irc_all_date <- left_join(tmp_irc_all_date,tmp_irc_nonUS_date,by=c("trd_exctn_dt"))
  # tmp_irc_all_date <- left_join(tmp_irc_all_date,tmp_irc_Finance_date,by=c("trd_exctn_dt"))
  # tmp_irc_all_date <- left_join(tmp_irc_all_date,tmp_irc_Utility_date,by=c("trd_exctn_dt"))
  # tmp_irc_all_date <- left_join(tmp_irc_all_date,tmp_irc_OilGas_date,by=c("trd_exctn_dt"))
  # tmp_irc_all_date <- left_join(tmp_irc_all_date,tmp_irc_Transportation_date,by=c("trd_exctn_dt"))
  
  tmp_irc_all_date <- as.data.table(tmp_irc_all_date)
  
  
  print(nrow(tmp_irc_all_date))
  

  ######################################################################################
  #continue identifying agency trades
  #round-2: unmatch_TMP_C2D and TMP_D2D
  TMP_Agency_2 <- data.table(sqldf('select a.C2D_ID, b.D2D_ID,
                                 b.new_datetime_exctn-a.new_datetime_exctn as timediff
                                 from unmatch_TMP_C2D as a, TMP_D2D as b
                                 where a.cusip_id=b.cusip_id and a.trd_exctn_dt=b.trd_exctn_dt and a.Quantity=b.Quantity and timediff<=900 and timediff>=0
                                 order by C2D_ID, timediff'))
  ##############################################################
  ###update01/03/2021: relax equal quantity restriction, to allow absolute difference not bigger than 0.1
  # TMP_Agency_2 <- data.table(sqldf('select a.C2D_ID, b.D2D_ID,
  #                                b.new_datetime_exctn-a.new_datetime_exctn as timediff,
  #                                abs(a.Quantity_num-b.Quantity_num) as quantityabsdiff
  #                                from unmatch_TMP_C2D as a, TMP_D2D as b
  #                                where a.cusip_id=b.cusip_id and a.trd_exctn_dt=b.trd_exctn_dt and quantityabsdiff<0.1 and timediff<=900 and timediff>=0
  #                                order by C2D_ID, timediff'))
  ##############################################################
  
  nrow(TMP_Agency_2)
  TMP_Agency_2[,`:=`(count_seq=sequence(.N)),
               by=c("C2D_ID")]
  TMP_Agency_2<-TMP_Agency_2[count_seq==1]
  nrow(TMP_Agency_2)
  
  match_TMP_C2D<-rbind(match_TMP_C2D,unmatch_TMP_C2D[C2D_ID %in% as.vector(TMP_Agency_2$C2D_ID)])
  unmatch_TMP_C2D<-unmatch_TMP_C2D[!(C2D_ID %in% as.vector(TMP_Agency_2$C2D_ID))]
  match_TMP_D2D<-TMP_D2D[D2D_ID %in% as.vector(TMP_Agency_2$D2D_ID)]
  unmatch_TMP_D2D<-TMP_D2D[!(D2D_ID %in% as.vector(TMP_Agency_2$D2D_ID))]
  
  #round-3: unmatch_TMP_D2C and unmatch_TMP_D2D
  setnames(unmatch_TMP_D2C,
           c("cusip_id_D2C","trd_exctn_dt_D2C", "new_datetime_exctn_D2C", "rptd_pr_D2C",  "Quantity_num_D2C", "Quantity_D2C",      
             "bond_grade_D2C",paste0(ExtraDum,"_D2C")),
           c("cusip_id","trd_exctn_dt", "new_datetime_exctn", "rptd_pr",  "Quantity_num", "Quantity",      
             "bond_grade",ExtraDum))
  setnames(match_TMP_D2C,
           c("cusip_id_D2C","trd_exctn_dt_D2C", "new_datetime_exctn_D2C", "rptd_pr_D2C",  "Quantity_num_D2C", "Quantity_D2C",     
             "bond_grade_D2C",paste0(ExtraDum,"_D2C")),
           c("cusip_id","trd_exctn_dt", "new_datetime_exctn", "rptd_pr",  "Quantity_num", "Quantity",       
             "bond_grade",ExtraDum))
  TMP_Agency_3 <- data.table(sqldf('select a.D2D_ID, b.D2C_ID,
                                 b.new_datetime_exctn-a.new_datetime_exctn as timediff
                                 from unmatch_TMP_D2D as a, unmatch_TMP_D2C as b
                                 where a.cusip_id=b.cusip_id and a.trd_exctn_dt=b.trd_exctn_dt and a.Quantity=b.Quantity and timediff<=900 and timediff>=0
                                 order by D2D_ID, timediff'))
  
  # TMP_Agency_3 <- data.table(sqldf('select a.D2D_ID, b.D2C_ID,
  #                                b.new_datetime_exctn-a.new_datetime_exctn as timediff,
  #                                abs(a.Quantity_num-b.Quantity_num) as quantityabsdiff
  #                                from unmatch_TMP_D2D as a, unmatch_TMP_D2C as b
  #                                where a.cusip_id=b.cusip_id and a.trd_exctn_dt=b.trd_exctn_dt and quantityabsdiff<0.1 and timediff<=900 and timediff>=0
  #                                order by D2D_ID, timediff'))
  
  nrow(TMP_Agency_3)
  TMP_Agency_3[,`:=`(count_seq=sequence(.N)),
               by=c("D2D_ID")]
  TMP_Agency_3<-TMP_Agency_3[count_seq==1]
  nrow(TMP_Agency_3)
  
  match_TMP_D2C<-rbind(match_TMP_D2C,unmatch_TMP_D2C[D2C_ID %in% as.vector(TMP_Agency_3$D2C_ID)],fill=TRUE)
  unmatch_TMP_D2C<-unmatch_TMP_D2C[!(D2C_ID %in% as.vector(TMP_Agency_3$D2C_ID))]
  match_TMP_D2D<-rbind(match_TMP_D2D,unmatch_TMP_D2D[D2D_ID %in% as.vector(TMP_Agency_3$D2D_ID)],fill=TRUE)
  unmatch_TMP_D2D<-unmatch_TMP_D2D[!(D2D_ID %in% as.vector(TMP_Agency_3$D2D_ID))]
  
  nrow(match_TMP_C2D)+nrow(unmatch_TMP_C2D)
  nrow(TMP_C2D)
  nrow(match_TMP_D2C)+nrow(unmatch_TMP_D2C)
  nrow(TMP_D2C)
  nrow(match_TMP_D2D)+nrow(unmatch_TMP_D2D)
  nrow(TMP_D2D)
  
  ###for next three rounds, relax par value equality restriction.
  #round-4:unmatch_TMP_C2D and unmatch_TMP_D2C
  TMP_Agency_4 <- data.table(sqldf('select a.C2D_ID, b.D2C_ID,
                                 b.new_datetime_exctn-a.new_datetime_exctn as timediff
                                 from unmatch_TMP_C2D as a, unmatch_TMP_D2C as b
                                 where a.cusip_id=b.cusip_id and a.trd_exctn_dt=b.trd_exctn_dt and a.Quantity_num>=b.Quantity_num and timediff<=900 and timediff>=0
                                 order by C2D_ID, timediff'))
  nrow(TMP_Agency_4)
  TMP_Agency_4[,`:=`(count_seq=sequence(.N)),
               by=c("C2D_ID")]
  TMP_Agency_4<-TMP_Agency_4[count_seq==1]
  nrow(TMP_Agency_4)
  
  match_TMP_C2D<-rbind(match_TMP_C2D,unmatch_TMP_C2D[C2D_ID %in% as.vector(TMP_Agency_4$C2D_ID)])
  unmatch_TMP_C2D<-unmatch_TMP_C2D[!(C2D_ID %in% as.vector(TMP_Agency_4$C2D_ID))]
  match_TMP_D2C<-rbind(match_TMP_D2C,unmatch_TMP_D2C[D2C_ID %in% as.vector(TMP_Agency_4$D2C_ID)])
  unmatch_TMP_D2C<-unmatch_TMP_D2C[!(D2C_ID %in% as.vector(TMP_Agency_4$D2C_ID))]
  
  #round-5: unmatch_TMP_C2D and unmatch_TMP_D2D
  TMP_Agency_5 <- data.table(sqldf('select a.C2D_ID, b.D2D_ID,
                                 b.new_datetime_exctn-a.new_datetime_exctn as timediff
                                 from unmatch_TMP_C2D as a, unmatch_TMP_D2D as b
                                 where a.cusip_id=b.cusip_id and a.trd_exctn_dt=b.trd_exctn_dt and a.Quantity_num>=b.Quantity_num and timediff<=900 and timediff>=0
                                 order by C2D_ID, timediff'))
  
  nrow(TMP_Agency_5)
  TMP_Agency_5[,`:=`(count_seq=sequence(.N)),
               by=c("C2D_ID")]
  TMP_Agency_5<-TMP_Agency_5[count_seq==1]
  nrow(TMP_Agency_5)
  
  match_TMP_C2D<-rbind(match_TMP_C2D,unmatch_TMP_C2D[C2D_ID %in% as.vector(TMP_Agency_5$C2D_ID)])
  unmatch_TMP_C2D<-unmatch_TMP_C2D[!(C2D_ID %in% as.vector(TMP_Agency_5$C2D_ID))]
  match_TMP_D2D<-rbind(match_TMP_D2D,unmatch_TMP_D2D[D2D_ID %in% as.vector(TMP_Agency_5$D2D_ID)])
  unmatch_TMP_D2D<-unmatch_TMP_D2D[!(D2D_ID %in% as.vector(TMP_Agency_5$D2D_ID))]
  
  #round-6:unmatch_TMP_D2C and unmatch_TMP_D2D
  TMP_Agency_6 <- data.table(sqldf('select a.D2D_ID, b.D2C_ID,
                                 b.new_datetime_exctn-a.new_datetime_exctn as timediff
                                 from unmatch_TMP_D2D as a, unmatch_TMP_D2C as b
                                 where a.cusip_id=b.cusip_id and a.trd_exctn_dt=b.trd_exctn_dt and a.Quantity_num<=b.Quantity_num and timediff<=900 and timediff>=0
                                 order by D2D_ID, timediff'))
  nrow(TMP_Agency_6)
  TMP_Agency_6[,`:=`(count_seq=sequence(.N)),
               by=c("D2D_ID")]
  TMP_Agency_6<-TMP_Agency_6[count_seq==1]
  nrow(TMP_Agency_6)
  
  match_TMP_D2C<-rbind(match_TMP_D2C,unmatch_TMP_D2C[D2C_ID %in% as.vector(TMP_Agency_6$D2C_ID)])
  unmatch_TMP_D2C<-unmatch_TMP_D2C[!(D2C_ID %in% as.vector(TMP_Agency_6$D2C_ID))]
  match_TMP_D2D<-rbind(match_TMP_D2D,unmatch_TMP_D2D[D2D_ID %in% as.vector(TMP_Agency_6$D2D_ID)])
  unmatch_TMP_D2D<-unmatch_TMP_D2D[!(D2D_ID %in% as.vector(TMP_Agency_6$D2D_ID))]
  
  nrow(match_TMP_C2D)+nrow(unmatch_TMP_C2D)
  nrow(TMP_C2D)
  nrow(match_TMP_D2C)+nrow(unmatch_TMP_D2C)
  nrow(TMP_D2C)
  nrow(match_TMP_D2D)+nrow(unmatch_TMP_D2D)
  nrow(TMP_D2D)
  
  
  match_ID=c(c(match_TMP_C2D$C2D_ID),c(match_TMP_D2D$D2D_ID),c(match_TMP_D2C$D2C_ID))
  
  TradeData_filter_masterfundamental[,`:=`(capacity="")]
  TradeData_filter_masterfundamental[(ID %in% match_ID),
                                     `:=`(capacity="A")]
  TradeData_filter_masterfundamental[!(ID %in% match_ID),
                                     `:=`(capacity="P")]
  print(table(TradeData_filter_masterfundamental$capacity))
  print(nrow(match_TMP_C2D)+nrow(match_TMP_D2D)+nrow(match_TMP_D2C))
  
  TradeData_filter_masterfundamental[remuneration=="C",
                                     `:=`(capacity="A")]
  table(TradeData_filter_masterfundamental$capacity)


  setnames(TradeData_filter_masterfundamental,
           c("Quantity","Quantity_num"),
           c("Quantity(ParVal)","Quantity(ParVal)_num"))
  
  print(nrow(TradeData_filter_masterfundamental))
  
  ###Create new data table to store all transactions
  TradeData_filter_masterfundamental_allcapacity<-
    TradeData_filter_masterfundamental
  
  print(nrow(TradeData_filter_masterfundamental_allcapacity))
  print(table(TradeData_filter_masterfundamental_allcapacity$capacity))
  
  ###exclude agency transactions after calculating IRCs
  
  TradeData_filter_masterfundamental<-
    TradeData_filter_masterfundamental[capacity=="P"]
  
  print(nrow(TradeData_filter_masterfundamental))
  print(message(paste0("distribution of capacity in TradeData_filter_masterfundamental is :")))
  print(table(TradeData_filter_masterfundamental$capacity))
  print(message(paste0("distribution of capacity in TradeData_filter_masterfundamental_allcapacity is :")))
  print(table(TradeData_filter_masterfundamental_allcapacity$capacity))
  
  ###[2]We need to only use secondary market transactions, i.e. time_to_offering > 90 (we temporarily maintain those trades with time_to_offering as NA)
  print(summary(TradeData_filter_masterfundamental$time_to_offering))
  
  print(nrow(TradeData_filter_masterfundamental))
  
  TradeData_filter_masterfundamental<-
    TradeData_filter_masterfundamental[time_to_offering>=90|is.na(time_to_offering)]
  
  print(nrow(TradeData_filter_masterfundamental))
  
  ###Do the same for TradeData_filter_masterfundamental_allcapacity (although in TradeData_filter_masterfundamental, minimum time_to_offering is 125 days)
  print(summary(TradeData_filter_masterfundamental_allcapacity$time_to_offering))
  
  print(nrow(TradeData_filter_masterfundamental_allcapacity))
  
  TradeData_filter_masterfundamental_allcapacity<-
    TradeData_filter_masterfundamental_allcapacity[time_to_offering>=90|is.na(time_to_offering)]
  
  print(nrow(TradeData_filter_masterfundamental_allcapacity))
  
  ###CH measure
  
  TradeData_filter_masterfundamental[dum_dealer_to_customer==1, customer_buy_indic:=1]
  TradeData_filter_masterfundamental[dum_customer_to_dealer==1, customer_buy_indic:=-1]
  
  #################################
  # calcualte the reference price #
  #################################
  # For each customer trade, calculate its reference price as the 
  # volume-weighted average price of interdealer trades larger than $100,000 
  # in the same bond-day, excluding interdealer trades executed within 15 minutes.
  # $100 = par value, and the volume traded (in dollars of par). 
  
  
  # TradeData_filter_masterfundamental<-fread("INTERMEDIATESAVE_TradeData_filter_masterfundamental.csv")
  
  # TMP_C <- TradeData_filter_masterfundamental[dum_interdealer==0,.(cusip_id,trd_exctn_dt,new_datetime_exctn,`Quantity(ParVal)_num`,rptd_pr,
  #                                                                  ID)]
  # TMP_D <- TradeData_filter_masterfundamental[dum_interdealer==1 & `Quantity(ParVal)_num` >100e3,
  #                                             .(cusip_id,trd_exctn_dt,new_datetime_exctn, `Quantity(ParVal)_num` ,rptd_pr, ID)]
  
  TMP_C <- TradeData_filter_masterfundamental[dum_interdealer==0,c("cusip_id","trd_exctn_dt","new_datetime_exctn","rptd_pr","Quantity(ParVal)_num","bond_grade",
                                                                   "ID",ExtraDum),
                                              with=FALSE]
  
  TMP_D <- TradeData_filter_masterfundamental[dum_interdealer==1 & `Quantity(ParVal)_num` >100e3,
                                              c("cusip_id","trd_exctn_dt","new_datetime_exctn","rptd_pr","Quantity(ParVal)_num","bond_grade",
                                                "ID",ExtraDum),
                                              with=FALSE]
  
  setnames(TMP_D,c("ID"),c("DD_ID"))
  setnames(TMP_C,c("ID"),c("DC_ID"))
  
  setnames(TMP_D,c("cusip_id", "trd_exctn_dt", "new_datetime_exctn", "Quantity(ParVal)_num","rptd_pr","bond_grade",ExtraDum),
           c(paste0(c("cusip_id", "trd_exctn_dt", "new_datetime_exctn", "Quantity(ParVal)_num","rptd_pr","bond_grade",ExtraDum),"_DD")))
  
  TMP_M <- data.table(sqldf('select *
                          from TMP_C as a, TMP_D as b
                          where a.cusip_id=b.cusip_id_DD and a.trd_exctn_dt=b.trd_exctn_dt_DD and (a.new_datetime_exctn>=b.new_datetime_exctn_DD+900 or a.new_datetime_exctn<=b.new_datetime_exctn_DD-900)
                          order by a.cusip_id, a.trd_exctn_dt, a.new_datetime_exctn'))
  
  # TMP_M <- data.table(sqldf('select *
  #                           from TMP_D as a, TMP_C as b
  #                           where a.cusip_id=b.cusip_id and a.trd_exctn_dt=b.trd_exctn_dt and (a.new_datetime_exctn>=b.new_datetime_exctn+900 or a.new_datetime_exctn<=b.new_datetime_exctn-900)
  #                           order by a.cusip_id, a.trd_exctn_dt, a.new_datetime_exctn'))
  
  nrow(TMP_M)
  # TMP_M[,`:=`(count_seq=sequence(.N)),
  #       by=c("DD_ID")]
  TMP_M[,`:=`(count_seq=sequence(.N)),
        by=c("DC_ID")]
  # TMP_M<-TMP_M[count_seq==1]
  # nrow(TMP_M)
  
  # TMP_M[,`:=`(new_datetime_exctn_DC=as.POSIXct(new_datetime_exctn..10, origin = "1970-01-01 00:00:00"))]
  
  # TMP_M[,ref_pr:=weighted.mean(rptd_pr, w=`Quantity(ParVal)_num`), by=.(trd_exctn_dt,cusip_id)]
  
  TMP_M[,ref_pr:=weighted.mean(rptd_pr_DD, w=`Quantity(ParVal)_num_DD`,na.rm = TRUE), by=.(DC_ID)]
  
  rm(TMP_D)
  
  # ref_prices <- unique(TMP_M[,.(trd_exctn_dt,cusip_id,ref_pr)])
  ref_prices <- TMP_M[count_seq==1,c("ref_pr","DC_ID"),
                      with=FALSE]
  
  setnames(ref_prices,c("DC_ID"),c("ID"))
  
  print(nrow(TradeData_filter_masterfundamental))
  
  TradeData_filter_masterfundamental<-
    left_join(TradeData_filter_masterfundamental,
              ref_prices,
              by=c("ID"))
  
  print(nrow(TradeData_filter_masterfundamental))
  
  ###there are many transactions with no-reference prices.
  print(paste0("calculated reference price is:"))
  print(summary(TradeData_filter_masterfundamental$ref_pr))
  
  #################################
  # calcualte spread1 in Choi-Huh #
  #################################
  # All bid-ask spread measures are calculated using trades with par values of $1 million and above
  # trade-level
  TradeData_filter_masterfundamental<-as.data.table(TradeData_filter_masterfundamental)
  TradeData_filter_masterfundamental[`Quantity(ParVal)_num`>= 1000000,
                                     spread1:=2*customer_buy_indic*(rptd_pr/ref_pr-1)]
  # winsorized at the 1% level each day
  TradeData_filter_masterfundamental[,
                                     `:=`(high1pt_spread1=quantile(spread1,probs = 0.99,
                                                                   na.rm = TRUE),
                                          low1pt_spread1=quantile(spread1,probs = 0.01,
                                                                  na.rm = TRUE)),
                                     by=c("trd_exctn_dt")]
  
  TradeData_filter_masterfundamental[spread1<low1pt_spread1,
                                     `:=`(spread1=low1pt_spread1)]
  TradeData_filter_masterfundamental[spread1>high1pt_spread1,
                                     `:=`(spread1=high1pt_spread1)]
  
  # bond-day-level
  TradeData_filter_masterfundamental[,
                                     spread1_bondday:=weighted.mean(spread1, w = `Quantity(ParVal)_num`, na.rm = TRUE), by=.(trd_exctn_dt,cusip_id)]
  # winsorized at the 1% level each day
  TradeData_filter_masterfundamental[,
                                     `:=`(high1pt_spread1_bondday=quantile(spread1_bondday,probs = 0.99,
                                                                           na.rm = TRUE),
                                          low1pt_spread1_bondday=quantile(spread1_bondday,probs = 0.01,
                                                                          na.rm = TRUE)),
                                     by=c("trd_exctn_dt")]
  
  TradeData_filter_masterfundamental[spread1_bondday<low1pt_spread1_bondday,
                                     `:=`(spread1_bondday=low1pt_spread1_bondday)]
  TradeData_filter_masterfundamental[spread1_bondday>high1pt_spread1_bondday,
                                     `:=`(spread1_bondday=high1pt_spread1_bondday)]
  
  ###calcualte spread_CH
  TradeData_filter_masterfundamental[,
                                     spread_CH:=2*customer_buy_indic*(rptd_pr/ref_pr-1)]
  
  # bond-day-level
  TradeData_filter_masterfundamental[,
                                     spread_CH_bondday:=weighted.mean(spread_CH, w = `Quantity(ParVal)_num`, 
                                                                      na.rm = TRUE), by=.(trd_exctn_dt,cusip_id)]
  # winsorized at the 1% level each day
  print(paste0("distribution of spread_CH_bondday for is: "))
  print(summary(TradeData_filter_masterfundamental$spread_CH_bondday))
  TradeData_filter_masterfundamental[,
                                     `:=`(high1pt_spread_CH_bondday=quantile(spread_CH_bondday,probs = 0.99,
                                                                             na.rm = TRUE),
                                          low1pt_spread_CH_bondday=quantile(spread_CH_bondday,probs = 0.01,
                                                                            na.rm = TRUE)),
                                     by=c("trd_exctn_dt")]
  
  TradeData_filter_masterfundamental[spread_CH_bondday<low1pt_spread_CH_bondday,
                                     `:=`(spread_CH_bondday=low1pt_spread_CH_bondday)]
  TradeData_filter_masterfundamental[spread_CH_bondday>high1pt_spread_CH_bondday,
                                     `:=`(spread_CH_bondday=high1pt_spread_CH_bondday)]
  
  print(summary(TradeData_filter_masterfundamental$spread_CH_bondday))
  
  
  #########################################
  # calcualte same_day spread in Choi-Huh #
  #########################################
  TradeData_filter_masterfundamental[`Quantity(ParVal)_num`>= 1000000 & dum_dealer_to_customer==1,
                                     same_day_num1:=weighted.mean(rptd_pr, w = `Quantity(ParVal)_num`, na.rm = TRUE), by=.(trd_exctn_dt,cusip_id)]
  
  TradeData_filter_masterfundamental[`Quantity(ParVal)_num`>= 1000000 & dum_customer_to_dealer==1,
                                     same_day_num2:=weighted.mean(rptd_pr, w = `Quantity(ParVal)_num`, na.rm = TRUE), by=.(trd_exctn_dt,cusip_id)]
  
  TradeData_filter_masterfundamental[,
                                     same_day_num:=2*(unique(na.omit(same_day_num1)) - unique(na.omit(same_day_num2))), by=.(trd_exctn_dt,cusip_id)]
  TradeData_filter_masterfundamental[,
                                     same_day_denom:=unique(na.omit(same_day_num1)) + unique(na.omit(same_day_num2)),by=.(trd_exctn_dt,cusip_id)]
  TradeData_filter_masterfundamental[,
                                     same_day:=same_day_num/same_day_denom, by=.(trd_exctn_dt,cusip_id)]
  
  # winsorized at the 1% level each day
  TradeData_filter_masterfundamental[,
                                     `:=`(high1pt_same_day=quantile(same_day,probs = 0.99,
                                                                    na.rm = TRUE),
                                          low1pt_same_day=quantile(same_day,probs = 0.01,
                                                                   na.rm = TRUE)),
                                     by=c("trd_exctn_dt")]
  
  TradeData_filter_masterfundamental[same_day<low1pt_same_day,
                                     `:=`(same_day=low1pt_same_day)]
  TradeData_filter_masterfundamental[same_day>high1pt_same_day,
                                     `:=`(same_day=high1pt_same_day)]
  
  
  ###Bond_day level only
  # TradeData_filter_masterfundamental[,`:=`(count_seq=sequence(.N),
  #                                          bondday_totvol=sum(`Quantity(ParVal)_num`,na.rm = TRUE)),
  #                                    by=c("trd_exctn_dt","cusip_id")]
  
  TradeData_filter_masterfundamental[,`:=`(bondday_totvol=sum(`Quantity(ParVal)_num`,na.rm = TRUE)),
                                     by=c("trd_exctn_dt","cusip_id")]
  
  TradeData_filter_masterfundamental[,`:=`(bondday_totvol_customer=sum(`Quantity(ParVal)_num`*dum_customer,na.rm = TRUE)),
                                     by=c("trd_exctn_dt","cusip_id")]
  
  TradeData_filter_masterfundamental[!is.na(spread1_bondday),`:=`(bondday_totvol_withnonnanspread1=sum(`Quantity(ParVal)_num`,na.rm = TRUE)),
                                     by=c("trd_exctn_dt","cusip_id")]
  
  TradeData_filter_masterfundamental[!is.na(spread_CH_bondday),`:=`(count_seq=sequence(.N),
                                                                    bondday_totvol_withnonnanspreadCH=sum(`Quantity(ParVal)_num`,na.rm = TRUE)),
                                     by=c("trd_exctn_dt","cusip_id")]

  
  TradeData_filter_masterfundamental_bondday<-TradeData_filter_masterfundamental[count_seq==1]
  
  
  # calcualte mean monthly spreads for all, IG, HY, TTM5yless, TTM5ymore bonds
  tmp1 <- TradeData_filter_masterfundamental_bondday[!is.nan(spread1_bondday) ,
                                                     .(mean_sprd1_all_bps=mean(spread1_bondday,na.rm = TRUE)/(0.01/100),
                                                       wavemean_sprd1_all_bps=weighted.mean(spread1_bondday,
                                                                                            bondday_totvol_withnonnanspread1,
                                                                                            na.rm = TRUE)/(0.01/100)), by=c("trd_exctn_dt")]
  
  tmp2 <- TradeData_filter_masterfundamental_bondday[ ,.(mean_same_day_all=mean(same_day,na.rm = TRUE)/(0.01/100),
                                                         wavemean_same_day_all=weighted.mean(same_day,
                                                                                             bondday_totvol_customer,
                                                                                             na.rm = TRUE)/(0.01/100)), by=c("trd_exctn_dt")]
  
  tmp3 <- TradeData_filter_masterfundamental_bondday[bond_grade=="I",.(mean_sprd1_IG_bps=mean(spread1_bondday,na.rm = TRUE)/(0.01/100),
                                                                       mean_same_day_IG=mean(same_day,na.rm = TRUE)/(0.01/100),
                                                                       wavemean_sprd1_IG_bps=weighted.mean(spread1_bondday,bondday_totvol_withnonnanspread1,na.rm = TRUE)/(0.01/100),
                                                                       wavemean_same_day_IG=weighted.mean(same_day,bondday_totvol_customer,na.rm = TRUE)/(0.01/100)), by=c("trd_exctn_dt")]
  
  
  tmp3_2 <- TradeData_filter_masterfundamental_bondday[dum_TTM5yless==1, 
                                                       .(mean_sprd1_TTM5yless_bps=mean(spread1_bondday,na.rm = TRUE)/(0.01/100),
                                                         mean_same_day_TTM5yless=mean(same_day,na.rm = TRUE)/(0.01/100),
                                                         wavemean_sprd1_TTM5yless_bps=weighted.mean(spread1_bondday,bondday_totvol_withnonnanspread1,na.rm = TRUE)/(0.01/100),
                                                         wavemean_same_day_TTM5yless=weighted.mean(same_day,bondday_totvol_customer,na.rm = TRUE)/(0.01/100)), by=c("trd_exctn_dt")]
  
  tmp3_3 <- TradeData_filter_masterfundamental_bondday[bond_grade=="I"&dum_TTM5yless==1,
                                                       .(mean_sprd1_IG_TTM5yless_bps=mean(spread1_bondday,na.rm = TRUE)/(0.01/100),
                                                         mean_same_day_IG_TTM5yless=mean(same_day,na.rm = TRUE)/(0.01/100)), by=c("trd_exctn_dt")]
  tmp3_4 <- TradeData_filter_masterfundamental_bondday[bond_grade=="I"&dum_TTM5ymore==1,
                                                       .(mean_sprd1_IG_TTM5ymore_bps=mean(spread1_bondday,na.rm = TRUE)/(0.01/100),
                                                         mean_same_day_IG_TTM5ymore=mean(same_day,na.rm = TRUE)/(0.01/100)), by=c("trd_exctn_dt")]
  
  tmp3_5 <- TradeData_filter_masterfundamental_bondday[dum_US==1, 
                                                       .(mean_sprd1_US_bps=mean(spread1_bondday,na.rm = TRUE)/(0.01/100),
                                                         mean_same_day_US=mean(same_day,na.rm = TRUE)/(0.01/100),
                                                         wavemean_sprd1_US_bps=weighted.mean(spread1_bondday,bondday_totvol_withnonnanspread1,na.rm = TRUE)/(0.01/100),
                                                         wavemean_same_day_US=weighted.mean(same_day,bondday_totvol_customer,na.rm = TRUE)/(0.01/100)), by=c("trd_exctn_dt")]
  
  tmp3_6 <- TradeData_filter_masterfundamental_bondday[dum_nonUS==1, 
                                                       .(mean_sprd1_nonUS_bps=mean(spread1_bondday,na.rm = TRUE)/(0.01/100),
                                                         mean_same_day_nonUS=mean(same_day,na.rm = TRUE)/(0.01/100),
                                                         wavemean_sprd1_nonUS_bps=weighted.mean(spread1_bondday,bondday_totvol_withnonnanspread1,na.rm = TRUE)/(0.01/100),
                                                         wavemean_same_day_nonUS=weighted.mean(same_day,bondday_totvol_customer,na.rm = TRUE)/(0.01/100)), by=c("trd_exctn_dt")]
  
  tmp3_7 <- TradeData_filter_masterfundamental_bondday[dum_Finance==1, 
                                                       .(mean_sprd1_Finance_bps=mean(spread1_bondday,na.rm = TRUE)/(0.01/100),
                                                         mean_same_day_Finance=mean(same_day,na.rm = TRUE)/(0.01/100),
                                                         wavemean_sprd1_Finance_bps=weighted.mean(spread1_bondday,bondday_totvol_withnonnanspread1,na.rm = TRUE)/(0.01/100),
                                                         wavemean_same_day_Finance=weighted.mean(same_day,bondday_totvol_customer,na.rm = TRUE)/(0.01/100)), by=c("trd_exctn_dt")]
  tmp3_8 <- TradeData_filter_masterfundamental_bondday[dum_Utility==1, 
                                                       .(mean_sprd1_Utility_bps=mean(spread1_bondday,na.rm = TRUE)/(0.01/100),
                                                         mean_same_day_Utility=mean(same_day,na.rm = TRUE)/(0.01/100),
                                                         wavemean_sprd1_Utility_bps=weighted.mean(spread1_bondday,bondday_totvol_withnonnanspread1,na.rm = TRUE)/(0.01/100),
                                                         wavemean_same_day_Utility=weighted.mean(same_day,bondday_totvol_customer,na.rm = TRUE)/(0.01/100)), by=c("trd_exctn_dt")]
  tmp3_9 <- TradeData_filter_masterfundamental_bondday[dum_Transportation==1, 
                                                       .(mean_sprd1_Transportation_bps=mean(spread1_bondday,na.rm = TRUE)/(0.01/100),
                                                         mean_same_day_Transportation=mean(same_day,na.rm = TRUE)/(0.01/100),
                                                         wavemean_sprd1_Transportation_bps=weighted.mean(spread1_bondday,bondday_totvol_withnonnanspread1,na.rm = TRUE)/(0.01/100),
                                                         wavemean_same_day_Transportation=weighted.mean(same_day,bondday_totvol_customer,na.rm = TRUE)/(0.01/100)), by=c("trd_exctn_dt")]
  tmp3_10 <- TradeData_filter_masterfundamental_bondday[dum_OilGas==1, 
                                                        .(mean_sprd1_OilGas_bps=mean(spread1_bondday,na.rm = TRUE)/(0.01/100),
                                                          mean_same_day_OilGas=mean(same_day,na.rm = TRUE)/(0.01/100),
                                                          wavemean_sprd1_OilGas_bps=weighted.mean(spread1_bondday,bondday_totvol_withnonnanspread1,na.rm = TRUE)/(0.01/100),
                                                          wavemean_same_day_OilGas=weighted.mean(same_day,bondday_totvol_customer,na.rm = TRUE)/(0.01/100)), by=c("trd_exctn_dt")]
  
  tmp3_11 <- TradeData_filter_masterfundamental_bondday[bond_grade=="H"&dum_TTM5yless==1, 
                                                        .(mean_sprd1_HY_TTM5yless_bps=mean(spread1_bondday,na.rm = TRUE)/(0.01/100),
                                                          mean_same_day_HY_TTM5yless=mean(same_day,na.rm = TRUE)/(0.01/100)), by=c("trd_exctn_dt")]
  tmp3_12 <- TradeData_filter_masterfundamental_bondday[bond_grade=="H"&dum_TTM5ymore==1, 
                                                        .(mean_sprd1_HY_TTM5ymore_bps=mean(spread1_bondday,na.rm = TRUE)/(0.01/100),
                                                          mean_same_day_HY_TTM5ymore=mean(same_day,na.rm = TRUE)/(0.01/100)), by=c("trd_exctn_dt")]
  
  tmp3_13 <- TradeData_filter_masterfundamental_bondday[bond_grade=="H",.(mean_sprd1_HY_bps=mean(spread1_bondday,na.rm = TRUE)/(0.01/100),
                                                                          mean_same_day_HY=mean(same_day,na.rm = TRUE)/(0.01/100),
                                                                          wavemean_sprd1_HY_bps=weighted.mean(spread1_bondday,bondday_totvol_withnonnanspread1,na.rm = TRUE)/(0.01/100),
                                                                          wavemean_same_day_HY=weighted.mean(same_day,bondday_totvol_customer,na.rm = TRUE)/(0.01/100)), by=c("trd_exctn_dt")]
  
  tmp3_14 <- TradeData_filter_masterfundamental_bondday[dum_TTM5ymore==1, 
                                                        .(mean_sprd1_TTM5ymore_bps=mean(spread1_bondday,na.rm = TRUE)/(0.01/100),
                                                          mean_same_day_TTM5ymore=mean(same_day,na.rm = TRUE)/(0.01/100),
                                                          wavemean_sprd1_TTM5ymore_bps=weighted.mean(spread1_bondday,bondday_totvol_withnonnanspread1,na.rm = TRUE)/(0.01/100),
                                                          wavemean_same_day_TTM5ymore=weighted.mean(same_day,bondday_totvol_customer,na.rm = TRUE)/(0.01/100)), by=c("trd_exctn_dt")]
  
  ###########################################################
  # calcualte the Roll measure in Bao, Pan, and Wang (2011) #
  ###########################################################
  ###Calculate Roll only using DC trades
  TMP_C<-TMP_C[order(cusip_id,new_datetime_exctn)]
  TMP_C[,num_trades_day:=.N,by=.(cusip_id, trd_exctn_dt)]
  TMP_C[,`:=`(shift_rptd_pr=shift(rptd_pr,n=as.integer(num_trades_day[1])-1,type="lead")),by=.(cusip_id,trd_exctn_dt)]
  TMP_C[,`:=`(count_seq=sequence(.N)),by=.(cusip_id,trd_exctn_dt)]
  TMP_C[count_seq==1,`:=`(shift_rptd_pr=NA)]
  setcolorder(TMP_C,c("rptd_pr","shift_rptd_pr"))
  
  TMP_C[num_trades_day>=10,dlogp:=log(rptd_pr)-log(shift_rptd_pr), by=.(cusip_id,trd_exctn_dt)]
  TMP_C[,lag_dlogp:=shift(dlogp,n=as.integer(num_trades_day[1])-1,type="lead"),by=.(cusip_id,trd_exctn_dt)]
  TMP_C[,`:=`(dlogp=as.numeric(dlogp),lag_dlogp=as.numeric(lag_dlogp))]
  TMP_C[count_seq==1,`:=`(lag_dlogp=NA)]
  setcolorder(TMP_C,c("dlogp","lag_dlogp"))
  
  Roll_meas_TMP_C <- TMP_C[!is.na(dlogp) & !is.na(lag_dlogp),`:=`(Roll=-cov(dlogp,lag_dlogp),
                                                                  bondday_totvol=sum(`Quantity(ParVal)_num`,na.rm = TRUE),
                                                                  count_seq=sequence(.N)),
                           by=c("trd_exctn_dt","cusip_id")]
  
  Roll_meas_TMP_C <- Roll_meas_TMP_C[count_seq==1,c("cusip_id","trd_exctn_dt","bond_grade",ExtraDum,
                                                    "Roll","bondday_totvol"),with=FALSE]
  
  Roll_meas_TMP_C <- Roll_meas_TMP_C[!is.na(Roll) & Roll>0]
  Roll_meas_TMP_C[,BPW:=Roll][,Roll:=2*sqrt(Roll)]
  
  tmp_Roll_meas_TMP_C<-Roll_meas_TMP_C[,.(wavemean_Roll_DCtrades=weighted.mean(Roll,bondday_totvol,na.rm = TRUE),
                                          wavemean_BPW_DCtrades=weighted.mean(BPW,bondday_totvol,na.rm = TRUE)),
                                       by=.(trd_exctn_dt)]
  
  tmp_Roll_meas_IG_TMP_C<-Roll_meas_TMP_C[bond_grade=="I",.(wavemean_Roll_DCtrades_IG=weighted.mean(Roll,bondday_totvol,na.rm = TRUE),
                                                            wavemean_BPW_DCtrades_IG=weighted.mean(BPW,bondday_totvol,na.rm = TRUE)),
                                          by=.(trd_exctn_dt)]
  
  tmp_Roll_meas_HY_TMP_C<-Roll_meas_TMP_C[bond_grade=="H",.(wavemean_Roll_DCtrades_HY=weighted.mean(Roll,bondday_totvol,na.rm = TRUE),
                                                            wavemean_BPW_DCtrades_HY=weighted.mean(BPW,bondday_totvol,na.rm = TRUE)),
                                          by=.(trd_exctn_dt)]
  
  tmp_Roll_meas_TTM5yless_TMP_C<-Roll_meas_TMP_C[dum_TTM5yless==1,.(wavemean_Roll_DCtrades_TTM5yless=weighted.mean(Roll,bondday_totvol,na.rm = TRUE),
                                                                    wavemean_BPW_DCtrades_TTM5yless=weighted.mean(BPW,bondday_totvol,na.rm = TRUE)),
                                                 by=.(trd_exctn_dt)]
  
  tmp_Roll_meas_TTM5ymore_TMP_C<-Roll_meas_TMP_C[dum_TTM5ymore==1,.(wavemean_Roll_DCtrades_TTM5ymore=weighted.mean(Roll,bondday_totvol,na.rm = TRUE),
                                                                    wavemean_BPW_DCtrades_TTM5ymore=weighted.mean(BPW,bondday_totvol,na.rm = TRUE)),
                                                 by=.(trd_exctn_dt)]
  
  tmp_Roll_meas_US_TMP_C<-Roll_meas_TMP_C[dum_US==1,.(wavemean_Roll_DCtrades_US=weighted.mean(Roll,bondday_totvol,na.rm = TRUE),
                                                      wavemean_BPW_DCtrades_US=weighted.mean(BPW,bondday_totvol,na.rm = TRUE)),
                                          by=.(trd_exctn_dt)]
  tmp_Roll_meas_nonUS_TMP_C<-Roll_meas_TMP_C[dum_nonUS==1,.(wavemean_Roll_DCtrades_nonUS=weighted.mean(Roll,bondday_totvol,na.rm = TRUE),
                                                            wavemean_BPW_DCtrades_nonUS=weighted.mean(BPW,bondday_totvol,na.rm = TRUE)),
                                             by=.(trd_exctn_dt)]
  tmp_Roll_meas_Finance_TMP_C<-Roll_meas_TMP_C[dum_Finance==1,.(wavemean_Roll_DCtrades_Finance=weighted.mean(Roll,bondday_totvol,na.rm = TRUE),
                                                                wavemean_BPW_DCtrades_Finance=weighted.mean(BPW,bondday_totvol,na.rm = TRUE)),
                                               by=.(trd_exctn_dt)]
  tmp_Roll_meas_OilGas_TMP_C<-Roll_meas_TMP_C[dum_OilGas==1,.(wavemean_Roll_DCtrades_OilGas=weighted.mean(Roll,bondday_totvol,na.rm = TRUE),
                                                              wavemean_BPW_DCtrades_OilGas=weighted.mean(BPW,bondday_totvol,na.rm = TRUE)),
                                              by=.(trd_exctn_dt)]
  tmp_Roll_meas_Transportation_TMP_C<-Roll_meas_TMP_C[dum_Transportation==1,.(wavemean_Roll_DCtrades_Transportation=weighted.mean(Roll,bondday_totvol,na.rm = TRUE),
                                                                              wavemean_BPW_DCtrades_Transportation=weighted.mean(BPW,bondday_totvol,na.rm = TRUE)),
                                                      by=.(trd_exctn_dt)]
  tmp_Roll_meas_Utility_TMP_C<-Roll_meas_TMP_C[dum_Utility==1,.(wavemean_Roll_DCtrades_Utility=weighted.mean(Roll,bondday_totvol,na.rm = TRUE),
                                                                wavemean_BPW_DCtrades_Utility=weighted.mean(BPW,bondday_totvol,na.rm = TRUE)),
                                               by=.(trd_exctn_dt)]
  
  
  ###Calculate Roll using all trades
  TradeData_filter_masterfundamental<-TradeData_filter_masterfundamental[order(cusip_id,new_datetime_exctn)]
  TradeData_filter_masterfundamental[,num_trades_day:=.N,by=.(cusip_id, trd_exctn_dt)]
  TradeData_filter_masterfundamental[,`:=`(shift_rptd_pr=shift(rptd_pr,n=as.integer(num_trades_day[1])-1,type="lead")),by=.(cusip_id,trd_exctn_dt)]
  TradeData_filter_masterfundamental[,`:=`(count_seq=sequence(.N)),by=.(cusip_id,trd_exctn_dt)]
  TradeData_filter_masterfundamental[count_seq==1,`:=`(shift_rptd_pr=NA)]
  setcolorder(TradeData_filter_masterfundamental,c("rptd_pr","shift_rptd_pr"))
  
  TradeData_filter_masterfundamental[num_trades_day>=10,dlogp:=log(rptd_pr)-log(shift_rptd_pr), by=.(cusip_id,trd_exctn_dt)]
  TradeData_filter_masterfundamental[,lag_dlogp:=shift(dlogp,n=as.integer(num_trades_day[1])-1,type="lead"),by=.(cusip_id,trd_exctn_dt)]
  TradeData_filter_masterfundamental[,`:=`(dlogp=as.numeric(dlogp),lag_dlogp=as.numeric(lag_dlogp))]
  TradeData_filter_masterfundamental[count_seq==1,`:=`(lag_dlogp=NA)]
  setcolorder(TradeData_filter_masterfundamental,c("dlogp","lag_dlogp"))
  
  Roll_meas_TradeData_filter_masterfundamental <- TradeData_filter_masterfundamental[!is.na(dlogp) & !is.na(lag_dlogp),`:=`(Roll=-cov(dlogp,lag_dlogp),
                                                                                                                            bondday_totvol=sum(`Quantity(ParVal)_num`,na.rm = TRUE),
                                                                                                                            count_seq=sequence(.N)),
                                                                                     by=c("trd_exctn_dt","cusip_id")]
  
  Roll_meas_TradeData_filter_masterfundamental <- Roll_meas_TradeData_filter_masterfundamental[count_seq==1,
                                                                                               c("cusip_id","trd_exctn_dt","bond_grade",ExtraDum,
                                                                                                 "Roll","bondday_totvol"),with=FALSE]
  
  
  # Roll_meas_TradeData_filter_masterfundamental <- TradeData_filter_masterfundamental[!is.na(dlogp) & !is.na(lag_dlogp),.(Roll=-cov(dlogp,lag_dlogp),
  #                                                              bondday_totvol=sum(`Quantity(ParVal)_num`,na.rm = TRUE)),by=.(trd_exctn_dt,cusip_id)]
  
  Roll_meas_TradeData_filter_masterfundamental <- Roll_meas_TradeData_filter_masterfundamental[!is.na(Roll) & Roll>0]
  Roll_meas_TradeData_filter_masterfundamental[,BPW:=Roll][,Roll:=2*sqrt(Roll)]
  
  tmp_Roll_meas_TradeData_filter_masterfundamental<-Roll_meas_TradeData_filter_masterfundamental[,.(wavemean_Roll_alltrades=weighted.mean(Roll,bondday_totvol,na.rm = TRUE),
                                                                                                    wavemean_BPW_alltrades=weighted.mean(BPW,bondday_totvol,na.rm = TRUE)),
                                                                                                 by=.(trd_exctn_dt)]
  
  tmp_Roll_meas_IG_TradeData_filter_masterfundamental<-Roll_meas_TradeData_filter_masterfundamental[bond_grade=="I",.(wavemean_Roll_alltrades_IG=weighted.mean(Roll,bondday_totvol,na.rm = TRUE),
                                                                                                                      wavemean_BPW_alltrades_IG=weighted.mean(BPW,bondday_totvol,na.rm = TRUE)),
                                                                                                    by=.(trd_exctn_dt)]
  
  tmp_Roll_meas_HY_TradeData_filter_masterfundamental<-Roll_meas_TradeData_filter_masterfundamental[bond_grade=="H",.(wavemean_Roll_alltrades_HY=weighted.mean(Roll,bondday_totvol,na.rm = TRUE),
                                                                                                                      wavemean_BPW_alltrades_HY=weighted.mean(BPW,bondday_totvol,na.rm = TRUE)),
                                                                                                    by=.(trd_exctn_dt)]
  
  tmp_Roll_meas_TTM5yless_TradeData_filter_masterfundamental<-Roll_meas_TradeData_filter_masterfundamental[dum_TTM5yless==1,.(wavemean_Roll_alltrades_TTM5yless=weighted.mean(Roll,bondday_totvol,na.rm = TRUE),
                                                                                                                              wavemean_BPW_alltrades_TTM5yless=weighted.mean(BPW,bondday_totvol,na.rm = TRUE)),
                                                                                                           by=.(trd_exctn_dt)]
  
  tmp_Roll_meas_TTM5ymore_TradeData_filter_masterfundamental<-Roll_meas_TradeData_filter_masterfundamental[dum_TTM5ymore==1,.(wavemean_Roll_alltrades_TTM5ymore=weighted.mean(Roll,bondday_totvol,na.rm = TRUE),
                                                                                                                              wavemean_BPW_alltrades_TTM5ymore=weighted.mean(BPW,bondday_totvol,na.rm = TRUE)),
                                                                                                           by=.(trd_exctn_dt)]
  
  tmp_Roll_meas_US_TradeData_filter_masterfundamental<-Roll_meas_TradeData_filter_masterfundamental[dum_US==1,.(wavemean_Roll_alltrades_US=weighted.mean(Roll,bondday_totvol,na.rm = TRUE),
                                                                                                                wavemean_BPW_alltrades_US=weighted.mean(BPW,bondday_totvol,na.rm = TRUE)),
                                                                                                    by=.(trd_exctn_dt)]
  tmp_Roll_meas_nonUS_TradeData_filter_masterfundamental<-Roll_meas_TradeData_filter_masterfundamental[dum_nonUS==1,.(wavemean_Roll_alltrades_nonUS=weighted.mean(Roll,bondday_totvol,na.rm = TRUE),
                                                                                                                      wavemean_BPW_alltrades_nonUS=weighted.mean(BPW,bondday_totvol,na.rm = TRUE)),
                                                                                                       by=.(trd_exctn_dt)]
  tmp_Roll_meas_Finance_TradeData_filter_masterfundamental<-Roll_meas_TradeData_filter_masterfundamental[dum_Finance==1,.(wavemean_Roll_alltrades_Finance=weighted.mean(Roll,bondday_totvol,na.rm = TRUE),
                                                                                                                          wavemean_BPW_alltrades_Finance=weighted.mean(BPW,bondday_totvol,na.rm = TRUE)),
                                                                                                         by=.(trd_exctn_dt)]
  tmp_Roll_meas_OilGas_TradeData_filter_masterfundamental<-Roll_meas_TradeData_filter_masterfundamental[dum_OilGas==1,.(wavemean_Roll_alltrades_OilGas=weighted.mean(Roll,bondday_totvol,na.rm = TRUE),
                                                                                                                        wavemean_BPW_alltrades_OilGas=weighted.mean(BPW,bondday_totvol,na.rm = TRUE)),
                                                                                                        by=.(trd_exctn_dt)]
  tmp_Roll_meas_Transportation_TradeData_filter_masterfundamental<-Roll_meas_TradeData_filter_masterfundamental[dum_Transportation==1,.(wavemean_Roll_alltrades_Transportation=weighted.mean(Roll,bondday_totvol,na.rm = TRUE),
                                                                                                                                        wavemean_BPW_alltrades_Transportation=weighted.mean(BPW,bondday_totvol,na.rm = TRUE)),
                                                                                                                by=.(trd_exctn_dt)]
  tmp_Roll_meas_Utility_TradeData_filter_masterfundamental<-Roll_meas_TradeData_filter_masterfundamental[dum_Utility==1,.(wavemean_Roll_alltrades_Utility=weighted.mean(Roll,bondday_totvol,na.rm = TRUE),
                                                                                                                          wavemean_BPW_alltrades_Utility=weighted.mean(BPW,bondday_totvol,na.rm = TRUE)),
                                                                                                         by=.(trd_exctn_dt)]
  
  ######################################################################
  # get the average volume of customer-bought and customer-sold trades #
  ######################################################################
  ###For transaction volume related variables, need to use TradeData_filter_masterfundamental_allcapacity
  tmp6 <- TradeData_filter_masterfundamental_allcapacity[,.(daily_vol_alltrades = sum(`Quantity(ParVal)_num`,na.rm=TRUE)/1e9, 
                                                daily_num_alltrades = sum(!is.na(`Quantity(ParVal)_num`)),
                                                daily_vol_customer_bought = sum(dum_dealer_to_customer*`Quantity(ParVal)_num`,na.rm = TRUE)/1e9,
                                                daily_num_customer_bought = sum(dum_dealer_to_customer,na.rm = TRUE),
                                                daily_vol_customer_sold = sum(dum_customer_to_dealer*`Quantity(ParVal)_num`,na.rm = TRUE)/1e9,
                                                daily_num_customer_sold = sum(dum_customer_to_dealer,na.rm = TRUE),
                                                daily_vol_interdealer = sum(dum_interdealer*`Quantity(ParVal)_num`,na.rm = TRUE)/1e9,
                                                daily_num_interdealer= sum(dum_interdealer,na.rm = TRUE),
                                                daily_avg_vol_customer_sold = sum(dum_customer_to_dealer*`Quantity(ParVal)_num`,na.rm = TRUE)/1e9/sum(dum_customer_to_dealer,na.rm = TRUE),
                                                daily_avg_vol_customer_bought = sum(dum_dealer_to_customer*`Quantity(ParVal)_num`,na.rm = TRUE)/1e9/sum(dum_dealer_to_customer,na.rm = TRUE),
                                                daily_avg_vol_interdealer = sum(dum_interdealer*`Quantity(ParVal)_num`,na.rm = TRUE)/1e9/sum(dum_interdealer,na.rm = TRUE)), 
                                             by=c("trd_exctn_dt")]
  
  tmp6[,
       `:=`(daily_netinflow_vol=daily_vol_customer_sold-daily_vol_customer_bought)]
  
  TradeData_filter_masterfundamental[,`:=`(count_seq=sequence(.N)),
                                     by=c("cusip_id","mtrty_dt","trd_exctn_dt")]
  TradeData_filter_masterfundamental[,`:=`(dum_IG=0,dum_HY=0,dum_NR=0)]
  TradeData_filter_masterfundamental[bond_grade=="I",`:=`(dum_IG=1)]
  TradeData_filter_masterfundamental[bond_grade=="H",`:=`(dum_HY=1)]
  TradeData_filter_masterfundamental[bond_grade=="NR",`:=`(dum_NR=1)]
  
  TradeData_filter_masterfundamental_allcapacity[,`:=`(count_seq=sequence(.N)),
                                     by=c("cusip_id","mtrty_dt","trd_exctn_dt")]
  TradeData_filter_masterfundamental_allcapacity[,`:=`(dum_IG=0,dum_HY=0,dum_NR=0)]
  TradeData_filter_masterfundamental_allcapacity[bond_grade=="I",`:=`(dum_IG=1)]
  TradeData_filter_masterfundamental_allcapacity[bond_grade=="H",`:=`(dum_HY=1)]
  TradeData_filter_masterfundamental_allcapacity[bond_grade=="NR",`:=`(dum_NR=1)]
  
  TradeData_filter_masterfundamental_bondday[,`:=`(dum_IG=0,dum_HY=0,dum_NR=0)]
  TradeData_filter_masterfundamental_bondday[bond_grade=="I",`:=`(dum_IG=1)]
  TradeData_filter_masterfundamental_bondday[bond_grade=="H",`:=`(dum_HY=1)]
  TradeData_filter_masterfundamental_bondday[bond_grade=="NR",`:=`(dum_NR=1)]
  
  tmp6_amtoutstanding<-TradeData_filter_masterfundamental_allcapacity[count_seq==1, c("cusip_id","mtrty_dt","trd_exctn_dt","amountoutstanding_thousands","dum_IG","dum_HY",
                                                                          "dum_TTM5yless","dum_TTM5ymore",
                                                                          ExtraDum),
                                                          with=FALSE]
  print(summary(tmp6_amtoutstanding))
  tmp6_amtoutstanding[,`:=`(daily_amountoutstanding_all=sum(amountoutstanding_thousands,na.rm = TRUE)*1000/1e9,
                            daily_amountoutstanding_IG=sum(dum_IG*amountoutstanding_thousands,na.rm = TRUE)*1000/1e9,
                            daily_amountoutstanding_HY=sum(dum_HY*amountoutstanding_thousands,na.rm = TRUE)*1000/1e9,
                            daily_amountoutstanding_TTM5yless=sum(dum_TTM5yless*amountoutstanding_thousands,na.rm = TRUE)*1000/1e9,
                            daily_amountoutstanding_TTM5ymore=sum(dum_TTM5ymore*amountoutstanding_thousands,na.rm = TRUE)*1000/1e9,
                            daily_amountoutstanding_IG_TTM5yless=sum(dum_IG*dum_TTM5yless*amountoutstanding_thousands,na.rm = TRUE)*1000/1e9,
                            daily_amountoutstanding_HY_TTM5yless=sum(dum_HY*dum_TTM5yless*amountoutstanding_thousands,na.rm = TRUE)*1000/1e9,
                            daily_amountoutstanding_IG_TTM5ymore=sum(dum_IG*dum_TTM5ymore*amountoutstanding_thousands,na.rm = TRUE)*1000/1e9,
                            daily_amountoutstanding_HY_TTM5ymore=sum(dum_HY*dum_TTM5ymore*amountoutstanding_thousands,na.rm = TRUE)*1000/1e9,
                            
                            daily_amountoutstanding_US=sum(dum_US*amountoutstanding_thousands,na.rm = TRUE)*1000/1e9,
                            daily_amountoutstanding_nonUS=sum(dum_nonUS*amountoutstanding_thousands,na.rm = TRUE)*1000/1e9,
                            daily_amountoutstanding_Finance=sum(dum_Finance*amountoutstanding_thousands,na.rm = TRUE)*1000/1e9,
                            daily_amountoutstanding_Utility=sum(dum_Utility*amountoutstanding_thousands,na.rm = TRUE)*1000/1e9,
                            daily_amountoutstanding_OilGas=sum(dum_OilGas*amountoutstanding_thousands,na.rm = TRUE)*1000/1e9,
                            daily_amountoutstanding_Transportation=sum(dum_Transportation*amountoutstanding_thousands,na.rm = TRUE)*1000/1e9,
                            
                            count_seq=sequence(.N)),
                      by=c("trd_exctn_dt")]
  tmp6_amtoutstanding<-tmp6_amtoutstanding[count_seq==1,
                                           c("trd_exctn_dt","daily_amountoutstanding_all","daily_amountoutstanding_IG","daily_amountoutstanding_HY",
                                             "daily_amountoutstanding_TTM5yless","daily_amountoutstanding_TTM5ymore",
                                             "daily_amountoutstanding_IG_TTM5yless","daily_amountoutstanding_HY_TTM5yless",
                                             "daily_amountoutstanding_IG_TTM5ymore","daily_amountoutstanding_HY_TTM5ymore",
                                             "daily_amountoutstanding_US","daily_amountoutstanding_nonUS",
                                             "daily_amountoutstanding_Finance","daily_amountoutstanding_Utility",
                                             "daily_amountoutstanding_OilGas","daily_amountoutstanding_Transportation"),
                                           with=FALSE]
  
  tmp7 <- TradeData_filter_masterfundamental_allcapacity[`Quantity(ParVal)_num`>=2000000,
                                             .(daily_vol_alltrades_2m = sum(`Quantity(ParVal)_num`,na.rm=TRUE)/1e9, 
                                               daily_num_alltrades_2m = sum(!is.na(`Quantity(ParVal)_num`)),
                                               daily_vol_customer_bought_2m = sum(dum_dealer_to_customer*`Quantity(ParVal)_num`,na.rm = TRUE)/1e9,
                                               daily_num_customer_bought_2m = sum(dum_dealer_to_customer,na.rm = TRUE),
                                               daily_vol_customer_sold_2m = sum(dum_customer_to_dealer*`Quantity(ParVal)_num`,na.rm = TRUE)/1e9,
                                               daily_num_customer_sold_2m = sum(dum_customer_to_dealer,na.rm = TRUE),
                                               daily_vol_interdealer_2m = sum(dum_interdealer*`Quantity(ParVal)_num`,na.rm = TRUE)/1e9,
                                               daily_num_interdealer_2m = sum(dum_interdealer,na.rm = TRUE),
                                               daily_avg_vol_customer_sold_2m = sum(dum_customer_to_dealer*`Quantity(ParVal)_num`,na.rm = TRUE)/1e9/sum(dum_customer_to_dealer,na.rm = TRUE),
                                               daily_avg_vol_customer_bought_2m = sum(dum_dealer_to_customer*`Quantity(ParVal)_num`,na.rm = TRUE)/1e9/sum(dum_dealer_to_customer,na.rm = TRUE),
                                               daily_avg_vol_interdealer_2m = sum(dum_interdealer*`Quantity(ParVal)_num`,na.rm = TRUE)/1e9/sum(dum_interdealer,na.rm = TRUE)),
                                             by=c("trd_exctn_dt")]
  
  TradeData_filter_masterfundamental_allcapacity[,`:=`(dum_agencytrade=0,
                                           dum_principaltrade=0)]
  TradeData_filter_masterfundamental_allcapacity[capacity=="A",`:=`(dum_agencytrade=1)]
  TradeData_filter_masterfundamental_allcapacity[capacity=="P",`:=`(dum_principaltrade=1)]
  
  # TradeData_filter_masterfundamental[,`:=`(dum_IG=0,
  #                                                               dum_HY=0)]
  # TradeData_filter_masterfundamental[bond_grade=="I",`:=`(dum_IG=1)]
  # TradeData_filter_masterfundamental[bond_grade=="H",`:=`(dum_HY=1)]
  
  print(table(TradeData_filter_masterfundamental$bond_grade))
  print(table(TradeData_filter_masterfundamental$dum_IG))
  print(table(TradeData_filter_masterfundamental$dum_HY))
  print(table(TradeData_filter_masterfundamental_allcapacity$bond_grade))
  print(table(TradeData_filter_masterfundamental_allcapacity$dum_IG))
  print(table(TradeData_filter_masterfundamental_allcapacity$dum_HY))
  print(table(TradeData_filter_masterfundamental_allcapacity$capacity))
  print(table(TradeData_filter_masterfundamental_allcapacity$dum_agencytrade))
  print(table(TradeData_filter_masterfundamental_allcapacity$dum_principaltrade))
  
  tmp8 <- TradeData_filter_masterfundamental_allcapacity[,.(daily_num_agencytrade = sum(dum_agencytrade,na.rm = TRUE),
                                                daily_num_principaltrade = sum(dum_principaltrade,na.rm = TRUE),
                                                daily_vol_agencytrade = sum(dum_agencytrade*`Quantity(ParVal)_num`,na.rm = TRUE)/1e9,
                                                daily_vol_principaltrade = sum(dum_principaltrade*`Quantity(ParVal)_num`,na.rm = TRUE)/1e9),
                                             by=c("trd_exctn_dt")]
  
  ###TradeData_filter_masterfundamental and TradeData_filter_masterfundamental_bondday only include principal trades to calculate 
  ###transaction costs
  tmp9 <- TradeData_filter_masterfundamental_bondday[ ,.(mean_spread_CH_bondday_bps=mean(spread_CH_bondday,na.rm = TRUE)/(0.01/100),
                                                         wavemean_spread_CH_bondday_bps=weighted.mean(spread_CH_bondday,bondday_totvol_withnonnanspreadCH,
                                                                                                      na.rm = TRUE)/(0.01/100)), 
                                                      by=c("trd_exctn_dt")]
  
  # new_tmp9<-TradeData_filter_masterfundamental_bondday[ ,.(wavemean_spread_CH_bondday_bps=weighted.mean(spread_CH_bondday,bondday_totvol,
  #                                                                                                   na.rm = TRUE)/(0.01/100)), 
  #                                                       by=c("trd_exctn_dt")]
  
  tmp10 <- TradeData_filter_masterfundamental_bondday[bond_grade=="I",.(mean_spread_CH_bondday_IG_bps=mean(spread_CH_bondday,na.rm = TRUE)/(0.01/100),
                                                                        wavemean_spread_CH_bondday_IG_bps=weighted.mean(spread_CH_bondday,
                                                                                                                        bondday_totvol_withnonnanspreadCH,
                                                                                                                        na.rm = TRUE)/(0.01/100)),
                                                      by=c("trd_exctn_dt")]
  
  # new_tmp10 <- TradeData_filter_masterfundamental_bondday[bond_grade=="I",.(wavemean_spread_CH_bondday_IG_bps=weighted.mean(spread_CH_bondday,
  #                                                                                                                           bondday_totvol,
  #                                                                                                                       na.rm = TRUE)/(0.01/100)),
  #                                                         by=c("trd_exctn_dt")]
  
  tmp10_2 <- TradeData_filter_masterfundamental_bondday[dum_TTM5yless==1,.(mean_spread_CH_bondday_TTM5yless_bps=mean(spread_CH_bondday,na.rm = TRUE)/(0.01/100),
                                                                           wavemean_spread_CH_bondday_TTM5yless_bps=weighted.mean(spread_CH_bondday,
                                                                                                                                  bondday_totvol_withnonnanspreadCH,
                                                                                                                                  na.rm = TRUE)/(0.01/100)),
                                                        by=c("trd_exctn_dt")]
  
  tmp10_3 <- TradeData_filter_masterfundamental_bondday[bond_grade=="I"&dum_TTM5yless==1,.(mean_spread_CH_bondday_IG_TTM5yless_bps=mean(spread_CH_bondday,na.rm = TRUE)/(0.01/100),
                                                                                           wavemean_spread_CH_bondday_IG_TTM5yless_bps=weighted.mean(spread_CH_bondday,
                                                                                                                                                     bondday_totvol_withnonnanspreadCH,
                                                                                                                                                     na.rm = TRUE)/(0.01/100)),
                                                        by=c("trd_exctn_dt")]
  
  tmp10_4 <- TradeData_filter_masterfundamental_bondday[bond_grade=="I"&dum_TTM5ymore==1,.(mean_spread_CH_bondday_IG_TTM5ymore_bps=mean(spread_CH_bondday,na.rm = TRUE)/(0.01/100),
                                                                                           wavemean_spread_CH_bondday_IG_TTM5ymore_bps=weighted.mean(spread_CH_bondday,
                                                                                                                                                     bondday_totvol_withnonnanspreadCH,
                                                                                                                                                     na.rm = TRUE)/(0.01/100)),
                                                        by=c("trd_exctn_dt")]
  
  tmp10_5 <- TradeData_filter_masterfundamental_bondday[bond_grade=="H"&dum_TTM5yless==1,.(mean_spread_CH_bondday_HY_TTM5yless_bps=mean(spread_CH_bondday,na.rm = TRUE)/(0.01/100),
                                                                                           wavemean_spread_CH_bondday_HY_TTM5yless_bps=weighted.mean(spread_CH_bondday,
                                                                                                                                                     bondday_totvol_withnonnanspreadCH,
                                                                                                                                                     na.rm = TRUE)/(0.01/100)),
                                                        by=c("trd_exctn_dt")]
  
  tmp10_6 <- TradeData_filter_masterfundamental_bondday[bond_grade=="H"&dum_TTM5ymore==1,.(mean_spread_CH_bondday_HY_TTM5ymore_bps=mean(spread_CH_bondday,na.rm = TRUE)/(0.01/100),
                                                                                           wavemean_spread_CH_bondday_HY_TTM5ymore_bps=weighted.mean(spread_CH_bondday,
                                                                                                                                                     bondday_totvol_withnonnanspreadCH,
                                                                                                                                                     na.rm = TRUE)/(0.01/100)),
                                                        by=c("trd_exctn_dt")]
  
  
  tmp11 <- TradeData_filter_masterfundamental_bondday[bond_grade=="H",.(mean_spread_CH_bondday_HY_bps=mean(spread_CH_bondday,na.rm = TRUE)/(0.01/100),
                                                                        wavemean_spread_CH_bondday_HY_bps=weighted.mean(spread_CH_bondday,
                                                                                                                        bondday_totvol_withnonnanspreadCH,
                                                                                                                        na.rm = TRUE)/(0.01/100)),
                                                      by=c("trd_exctn_dt")]
  
  # new_tmp11 <- TradeData_filter_masterfundamental_bondday[bond_grade=="H",.(wavemean_spread_CH_bondday_HY_bps=weighted.mean(spread_CH_bondday,
  #                                                                                                                           bondday_totvol_withnonnanspreadCH,
  #                                                                                                                       na.rm = TRUE)/(0.01/100)),
  #                                                         by=c("trd_exctn_dt")]
  
  tmp11_2 <- TradeData_filter_masterfundamental_bondday[dum_TTM5ymore==1,.(mean_spread_CH_bondday_TTM5ymore_bps=mean(spread_CH_bondday,na.rm = TRUE)/(0.01/100),
                                                                           wavemean_spread_CH_bondday_TTM5ymore_bps=weighted.mean(spread_CH_bondday,
                                                                                                                                  bondday_totvol_withnonnanspreadCH,
                                                                                                                                  na.rm = TRUE)/(0.01/100)),
                                                        by=c("trd_exctn_dt")]
  
  tmp11_3 <- TradeData_filter_masterfundamental_bondday[dum_US==1,.(mean_spread_CH_bondday_US_bps=mean(spread_CH_bondday,na.rm = TRUE)/(0.01/100),
                                                                    wavemean_spread_CH_bondday_US_bps=weighted.mean(spread_CH_bondday,
                                                                                                                    bondday_totvol_withnonnanspreadCH,
                                                                                                                    na.rm = TRUE)/(0.01/100)),
                                                        by=c("trd_exctn_dt")]
  tmp11_4 <- TradeData_filter_masterfundamental_bondday[dum_nonUS==1,.(mean_spread_CH_bondday_nonUS_bps=mean(spread_CH_bondday,na.rm = TRUE)/(0.01/100),
                                                                       wavemean_spread_CH_bondday_nonUS_bps=weighted.mean(spread_CH_bondday,
                                                                                                                          bondday_totvol_withnonnanspreadCH,
                                                                                                                          na.rm = TRUE)/(0.01/100)),
                                                        by=c("trd_exctn_dt")]
  tmp11_5 <- TradeData_filter_masterfundamental_bondday[dum_Finance==1,.(mean_spread_CH_bondday_Finance_bps=mean(spread_CH_bondday,na.rm = TRUE)/(0.01/100),
                                                                         wavemean_spread_CH_bondday_Finance_bps=weighted.mean(spread_CH_bondday,
                                                                                                                              bondday_totvol_withnonnanspreadCH,
                                                                                                                              na.rm = TRUE)/(0.01/100)),
                                                        by=c("trd_exctn_dt")]
  tmp11_6 <- TradeData_filter_masterfundamental_bondday[dum_Utility==1,.(mean_spread_CH_bondday_Utility_bps=mean(spread_CH_bondday,na.rm = TRUE)/(0.01/100),
                                                                         wavemean_spread_CH_bondday_Utility_bps=weighted.mean(spread_CH_bondday,
                                                                                                                              bondday_totvol_withnonnanspreadCH,
                                                                                                                              na.rm = TRUE)/(0.01/100)),
                                                        by=c("trd_exctn_dt")]
  tmp11_7 <- TradeData_filter_masterfundamental_bondday[dum_Transportation==1,.(mean_spread_CH_bondday_Transportation_bps=mean(spread_CH_bondday,na.rm = TRUE)/(0.01/100),
                                                                                wavemean_spread_CH_bondday_Transportation_bps=weighted.mean(spread_CH_bondday,
                                                                                                                                            bondday_totvol_withnonnanspreadCH,
                                                                                                                                            na.rm = TRUE)/(0.01/100)),
                                                        by=c("trd_exctn_dt")]
  tmp11_8 <- TradeData_filter_masterfundamental_bondday[dum_OilGas==1,.(mean_spread_CH_bondday_OilGas_bps=mean(spread_CH_bondday,na.rm = TRUE)/(0.01/100),
                                                                        wavemean_spread_CH_bondday_OilGas_bps=weighted.mean(spread_CH_bondday,
                                                                                                                            bondday_totvol_withnonnanspreadCH,
                                                                                                                            na.rm = TRUE)/(0.01/100)),
                                                        by=c("trd_exctn_dt")]
  
  ####################################################
  #calculate for eligible and non-eligible bonds
  tmp11_9 <- TradeData_filter_masterfundamental_bondday[(dum_TTM5yless==1)&
                                                          (bond_grade=="I")&
                                                          (dum_US==1),.(mean_spread_CH_bondday_Eligible_bps=mean(spread_CH_bondday,na.rm = TRUE)/(0.01/100),
                                                                                           wavemean_spread_CH_bondday_Eligible_bps=weighted.mean(spread_CH_bondday,
                                                                                                                                                     bondday_totvol_withnonnanspreadCH,
                                                                                                                                                     na.rm = TRUE)/(0.01/100)),
                                                        by=c("trd_exctn_dt")]
  tmp11_10 <- TradeData_filter_masterfundamental_bondday[!((dum_TTM5yless==1)&
                                                             (bond_grade=="I")&
                                                             (dum_US==1)),.(mean_spread_CH_bondday_NonEligible_bps=mean(spread_CH_bondday,na.rm = TRUE)/(0.01/100),
                                                                                           wavemean_spread_CH_bondday_NonEligible_bps=weighted.mean(spread_CH_bondday,
                                                                                                                                                     bondday_totvol_withnonnanspreadCH,
                                                                                                                                                     na.rm = TRUE)/(0.01/100)),
                                                        by=c("trd_exctn_dt")]
  ####################################################
  
  ###Just change for vol-related variables
  TradeData_filter_masterfundamental_allcapacity[,
                                                 `:=`(dum_Eligible=0)]
  TradeData_filter_masterfundamental_allcapacity[(dum_TTM5yless==1)&
                                                   (bond_grade=="I")&
                                                   (dum_US==1),
                                                 `:=`(dum_Eligible=1)]
  tmp12 <- TradeData_filter_masterfundamental_allcapacity[,.(daily_num_IG = sum(dum_IG,na.rm = TRUE),
                                                 daily_num_HY = sum(dum_HY,na.rm = TRUE),
                                                 daily_num_Eligible = sum(dum_Eligible,na.rm = TRUE),
                                                 
                                                 daily_num_TTM5yless = sum(dum_TTM5yless,na.rm = TRUE),
                                                 daily_num_TTM5ymore = sum(dum_TTM5ymore,na.rm = TRUE),
                                                 
                                                 daily_vol_IG = sum(dum_IG*`Quantity(ParVal)_num`,na.rm = TRUE)/1e9,
                                                 daily_vol_HY = sum(dum_HY*`Quantity(ParVal)_num`,na.rm = TRUE)/1e9,
                                                 daily_vol_Eligible = sum(dum_Eligible*`Quantity(ParVal)_num`,na.rm = TRUE)/1e9,
                                                 
                                                 daily_vol_TTM5yless = sum(dum_TTM5yless*`Quantity(ParVal)_num`,na.rm = TRUE)/1e9,
                                                 daily_vol_TTM5ymore = sum(dum_TTM5ymore*`Quantity(ParVal)_num`,na.rm = TRUE)/1e9,
                                                 
                                                 daily_vol_US = sum(dum_US*`Quantity(ParVal)_num`,na.rm = TRUE)/1e9,
                                                 daily_vol_nonUS = sum(dum_nonUS*`Quantity(ParVal)_num`,na.rm = TRUE)/1e9,
                                                 daily_vol_Finance = sum(dum_Finance*`Quantity(ParVal)_num`,na.rm = TRUE)/1e9,
                                                 daily_vol_Utility = sum(dum_Utility*`Quantity(ParVal)_num`,na.rm = TRUE)/1e9,
                                                 daily_vol_Transportation = sum(dum_Transportation*`Quantity(ParVal)_num`,na.rm = TRUE)/1e9,
                                                 daily_vol_OilGas = sum(dum_OilGas*`Quantity(ParVal)_num`,na.rm = TRUE)/1e9,
                                                 
                                                 daily_num_agencytrade_IG = sum(dum_agencytrade*dum_IG,na.rm = TRUE),
                                                 daily_num_principaltrade_IG = sum(dum_principaltrade*dum_IG,na.rm = TRUE),
                                                 daily_vol_agencytrade_IG = sum(dum_agencytrade*`Quantity(ParVal)_num`*dum_IG,na.rm = TRUE)/1e9,
                                                 daily_vol_principaltrade_IG = sum(dum_principaltrade*`Quantity(ParVal)_num`*dum_IG,na.rm = TRUE)/1e9,
                                                 
                                                 daily_num_agencytrade_US = sum(dum_agencytrade*dum_US,na.rm = TRUE),
                                                 daily_num_principaltrade_US = sum(dum_principaltrade*dum_US,na.rm = TRUE),
                                                 daily_vol_agencytrade_US = sum(dum_agencytrade*`Quantity(ParVal)_num`*dum_US,na.rm = TRUE)/1e9,
                                                 daily_vol_principaltrade_US = sum(dum_principaltrade*`Quantity(ParVal)_num`*dum_US,na.rm = TRUE)/1e9,
                                                 daily_num_agencytrade_nonUS = sum(dum_agencytrade*dum_nonUS,na.rm = TRUE),
                                                 daily_num_principaltrade_nonUS = sum(dum_principaltrade*dum_nonUS,na.rm = TRUE),
                                                 daily_vol_agencytrade_nonUS = sum(dum_agencytrade*`Quantity(ParVal)_num`*dum_nonUS,na.rm = TRUE)/1e9,
                                                 daily_vol_principaltrade_nonUS = sum(dum_principaltrade*`Quantity(ParVal)_num`*dum_nonUS,na.rm = TRUE)/1e9,
                                                 daily_num_agencytrade_Finance = sum(dum_agencytrade*dum_Finance,na.rm = TRUE),
                                                 daily_num_principaltrade_Finance = sum(dum_principaltrade*dum_Finance,na.rm = TRUE),
                                                 daily_vol_agencytrade_Finance = sum(dum_agencytrade*`Quantity(ParVal)_num`*dum_Finance,na.rm = TRUE)/1e9,
                                                 daily_vol_principaltrade_Finance = sum(dum_principaltrade*`Quantity(ParVal)_num`*dum_Finance,na.rm = TRUE)/1e9,
                                                 daily_num_agencytrade_Utility = sum(dum_agencytrade*dum_Utility,na.rm = TRUE),
                                                 daily_num_principaltrade_Utility = sum(dum_principaltrade*dum_Utility,na.rm = TRUE),
                                                 daily_vol_agencytrade_Utility = sum(dum_agencytrade*`Quantity(ParVal)_num`*dum_Utility,na.rm = TRUE)/1e9,
                                                 daily_vol_principaltrade_Utility = sum(dum_principaltrade*`Quantity(ParVal)_num`*dum_Utility,na.rm = TRUE)/1e9,
                                                 daily_num_agencytrade_Transportation = sum(dum_agencytrade*dum_Transportation,na.rm = TRUE),
                                                 daily_num_principaltrade_Transportation = sum(dum_principaltrade*dum_Transportation,na.rm = TRUE),
                                                 daily_vol_agencytrade_Transportation = sum(dum_agencytrade*`Quantity(ParVal)_num`*dum_Transportation,na.rm = TRUE)/1e9,
                                                 daily_vol_principaltrade_Transportation = sum(dum_principaltrade*`Quantity(ParVal)_num`*dum_Transportation,na.rm = TRUE)/1e9,
                                                 daily_num_agencytrade_OilGas = sum(dum_agencytrade*dum_OilGas,na.rm = TRUE),
                                                 daily_num_principaltrade_OilGas = sum(dum_principaltrade*dum_OilGas,na.rm = TRUE),
                                                 daily_vol_agencytrade_OilGas = sum(dum_agencytrade*`Quantity(ParVal)_num`*dum_OilGas,na.rm = TRUE)/1e9,
                                                 daily_vol_principaltrade_OilGas = sum(dum_principaltrade*`Quantity(ParVal)_num`*dum_OilGas,na.rm = TRUE)/1e9,
                                                 
                                                 
                                                 daily_num_agencytrade_TTM5yless = sum(dum_agencytrade*dum_TTM5yless,na.rm = TRUE),
                                                 daily_num_principaltrade_TTM5yless = sum(dum_principaltrade*dum_TTM5yless,na.rm = TRUE),
                                                 daily_vol_agencytrade_TTM5yless = sum(dum_agencytrade*`Quantity(ParVal)_num`*dum_TTM5yless,na.rm = TRUE)/1e9,
                                                 daily_vol_principaltrade_TTM5yless = sum(dum_principaltrade*`Quantity(ParVal)_num`*dum_TTM5yless,na.rm = TRUE)/1e9,
                                                 
                                                 daily_vol_agencytrade_IG_TTM5yless = sum(dum_agencytrade*`Quantity(ParVal)_num`*dum_IG*dum_TTM5yless,na.rm = TRUE)/1e9,
                                                 daily_vol_principaltrade_IG_TTM5yless = sum(dum_principaltrade*`Quantity(ParVal)_num`*dum_IG*dum_TTM5yless,na.rm = TRUE)/1e9,
                                                 daily_vol_agencytrade_IG_TTM5ymore = sum(dum_agencytrade*`Quantity(ParVal)_num`*dum_IG*dum_TTM5ymore,na.rm = TRUE)/1e9,
                                                 daily_vol_principaltrade_IG_TTM5ymore = sum(dum_principaltrade*`Quantity(ParVal)_num`*dum_IG*dum_TTM5ymore,na.rm = TRUE)/1e9,
                                                 daily_vol_agencytrade_HY_TTM5yless = sum(dum_agencytrade*`Quantity(ParVal)_num`*dum_HY*dum_TTM5yless,na.rm = TRUE)/1e9,
                                                 daily_vol_principaltrade_HY_TTM5yless = sum(dum_principaltrade*`Quantity(ParVal)_num`*dum_HY*dum_TTM5yless,na.rm = TRUE)/1e9,
                                                 daily_vol_agencytrade_HY_TTM5ymore = sum(dum_agencytrade*`Quantity(ParVal)_num`*dum_HY*dum_TTM5ymore,na.rm = TRUE)/1e9,
                                                 daily_vol_principaltrade_HY_TTM5ymore = sum(dum_principaltrade*`Quantity(ParVal)_num`*dum_HY*dum_TTM5ymore,na.rm = TRUE)/1e9,
                                                 
                                                 daily_num_agencytrade_HY = sum(dum_agencytrade*dum_HY,na.rm = TRUE),
                                                 daily_num_principaltrade_HY = sum(dum_principaltrade*dum_HY,na.rm = TRUE),
                                                 daily_vol_agencytrade_HY = sum(dum_agencytrade*`Quantity(ParVal)_num`*dum_HY,na.rm = TRUE)/1e9,
                                                 daily_vol_principaltrade_HY = sum(dum_principaltrade*`Quantity(ParVal)_num`*dum_HY,na.rm = TRUE)/1e9,
                                                 
                                                 daily_num_agencytrade_TTM5ymore = sum(dum_agencytrade*dum_TTM5ymore,na.rm = TRUE),
                                                 daily_num_principaltrade_TTM5ymore = sum(dum_principaltrade*dum_TTM5ymore,na.rm = TRUE),
                                                 daily_vol_agencytrade_TTM5ymore = sum(dum_agencytrade*`Quantity(ParVal)_num`*dum_TTM5ymore,na.rm = TRUE)/1e9,
                                                 daily_vol_principaltrade_TTM5ymore = sum(dum_principaltrade*`Quantity(ParVal)_num`*dum_TTM5ymore,na.rm = TRUE)/1e9,
                                                 
                                                 
                                                 daily_vol_customer_bought_IG = sum(dum_dealer_to_customer*`Quantity(ParVal)_num`*dum_IG,na.rm = TRUE)/1e9,
                                                 daily_num_customer_bought_IG = sum(dum_dealer_to_customer*dum_IG,na.rm = TRUE),
                                                 daily_vol_customer_sold_IG = sum(dum_customer_to_dealer*`Quantity(ParVal)_num`*dum_IG,na.rm = TRUE)/1e9,
                                                 daily_num_customer_sold_IG = sum(dum_customer_to_dealer*dum_IG,na.rm = TRUE),
                                                 daily_vol_interdealer_IG = sum(dum_interdealer*`Quantity(ParVal)_num`*dum_IG,na.rm = TRUE)/1e9,
                                                 daily_num_interdealer_IG= sum(dum_interdealer*dum_IG,na.rm = TRUE),
                                                 daily_avg_vol_customer_sold_IG = sum(dum_customer_to_dealer*`Quantity(ParVal)_num`*dum_IG,na.rm = TRUE)/1e9/sum(dum_customer_to_dealer*dum_IG,na.rm = TRUE),
                                                 daily_avg_vol_customer_bought_IG = sum(dum_dealer_to_customer*`Quantity(ParVal)_num`*dum_IG,na.rm = TRUE)/1e9/sum(dum_dealer_to_customer*dum_IG,na.rm = TRUE),
                                                 daily_avg_vol_interdealer_IG = sum(dum_interdealer*`Quantity(ParVal)_num`*dum_IG,na.rm = TRUE)/1e9/sum(dum_interdealer*dum_IG,na.rm = TRUE),
                                                 
                                                 daily_vol_customer_bought_TTM5yless = sum(dum_dealer_to_customer*`Quantity(ParVal)_num`*dum_TTM5yless,na.rm = TRUE)/1e9,
                                                 daily_num_customer_bought_TTM5yless = sum(dum_dealer_to_customer*dum_TTM5yless,na.rm = TRUE),
                                                 daily_vol_customer_sold_TTM5yless = sum(dum_customer_to_dealer*`Quantity(ParVal)_num`*dum_TTM5yless,na.rm = TRUE)/1e9,
                                                 daily_num_customer_sold_TTM5yless = sum(dum_customer_to_dealer*dum_TTM5yless,na.rm = TRUE),
                                                 daily_vol_interdealer_TTM5yless = sum(dum_interdealer*`Quantity(ParVal)_num`*dum_TTM5yless,na.rm = TRUE)/1e9,
                                                 daily_num_interdealer_TTM5yless= sum(dum_interdealer*dum_TTM5yless,na.rm = TRUE),
                                                 daily_avg_vol_customer_sold_TTM5yless = sum(dum_customer_to_dealer*`Quantity(ParVal)_num`*dum_TTM5yless,na.rm = TRUE)/1e9/sum(dum_customer_to_dealer*dum_TTM5yless,na.rm = TRUE),
                                                 daily_avg_vol_customer_bought_TTM5yless = sum(dum_dealer_to_customer*`Quantity(ParVal)_num`*dum_TTM5yless,na.rm = TRUE)/1e9/sum(dum_dealer_to_customer*dum_TTM5yless,na.rm = TRUE),
                                                 daily_avg_vol_interdealer_TTM5yless = sum(dum_interdealer*`Quantity(ParVal)_num`*dum_TTM5yless,na.rm = TRUE)/1e9/sum(dum_interdealer*dum_TTM5yless,na.rm = TRUE),
                                                 
                                                 daily_vol_customer_bought_IG_TTM5yless = sum(dum_dealer_to_customer*`Quantity(ParVal)_num`*dum_IG*dum_TTM5yless,na.rm = TRUE)/1e9,
                                                 daily_vol_customer_sold_IG_TTM5yless = sum(dum_customer_to_dealer*`Quantity(ParVal)_num`*dum_IG*dum_TTM5yless,na.rm = TRUE)/1e9,
                                                 daily_vol_customer_bought_IG_TTM5ymore = sum(dum_dealer_to_customer*`Quantity(ParVal)_num`*dum_IG*dum_TTM5ymore,na.rm = TRUE)/1e9,
                                                 daily_vol_customer_sold_IG_TTM5ymore = sum(dum_customer_to_dealer*`Quantity(ParVal)_num`*dum_IG*dum_TTM5ymore,na.rm = TRUE)/1e9,
                                                 daily_vol_customer_bought_HY_TTM5yless = sum(dum_dealer_to_customer*`Quantity(ParVal)_num`*dum_HY*dum_TTM5yless,na.rm = TRUE)/1e9,
                                                 daily_vol_customer_sold_HY_TTM5yless = sum(dum_customer_to_dealer*`Quantity(ParVal)_num`*dum_HY*dum_TTM5yless,na.rm = TRUE)/1e9,
                                                 daily_vol_customer_bought_HY_TTM5ymore = sum(dum_dealer_to_customer*`Quantity(ParVal)_num`*dum_HY*dum_TTM5ymore,na.rm = TRUE)/1e9,
                                                 daily_vol_customer_sold_HY_TTM5ymore = sum(dum_customer_to_dealer*`Quantity(ParVal)_num`*dum_HY*dum_TTM5ymore,na.rm = TRUE)/1e9,
                                                 
                                                 daily_vol_customer_bought_US = sum(dum_dealer_to_customer*`Quantity(ParVal)_num`*dum_US,na.rm = TRUE)/1e9,
                                                 daily_vol_customer_sold_US = sum(dum_customer_to_dealer*`Quantity(ParVal)_num`*dum_US,na.rm = TRUE)/1e9,
                                                 daily_vol_customer_bought_nonUS = sum(dum_dealer_to_customer*`Quantity(ParVal)_num`*dum_nonUS,na.rm = TRUE)/1e9,
                                                 daily_vol_customer_sold_nonUS = sum(dum_customer_to_dealer*`Quantity(ParVal)_num`*dum_nonUS,na.rm = TRUE)/1e9,
                                                 daily_vol_customer_bought_Finance = sum(dum_dealer_to_customer*`Quantity(ParVal)_num`*dum_Finance,na.rm = TRUE)/1e9,
                                                 daily_vol_customer_sold_Finance = sum(dum_customer_to_dealer*`Quantity(ParVal)_num`*dum_Finance,na.rm = TRUE)/1e9,
                                                 daily_vol_customer_bought_Utility = sum(dum_dealer_to_customer*`Quantity(ParVal)_num`*dum_Utility,na.rm = TRUE)/1e9,
                                                 daily_vol_customer_sold_Utility = sum(dum_customer_to_dealer*`Quantity(ParVal)_num`*dum_Utility,na.rm = TRUE)/1e9,
                                                 daily_vol_customer_bought_Transportation = sum(dum_dealer_to_customer*`Quantity(ParVal)_num`*dum_Transportation,na.rm = TRUE)/1e9,
                                                 daily_vol_customer_sold_Transportation = sum(dum_customer_to_dealer*`Quantity(ParVal)_num`*dum_Transportation,na.rm = TRUE)/1e9,
                                                 daily_vol_customer_bought_OilGas = sum(dum_dealer_to_customer*`Quantity(ParVal)_num`*dum_OilGas,na.rm = TRUE)/1e9,
                                                 daily_vol_customer_sold_OilGas = sum(dum_customer_to_dealer*`Quantity(ParVal)_num`*dum_OilGas,na.rm = TRUE)/1e9,
                                                 
                                                 
                                                 daily_vol_customer_bought_HY = sum(dum_dealer_to_customer*`Quantity(ParVal)_num`*dum_HY,na.rm = TRUE)/1e9,
                                                 daily_num_customer_bought_HY = sum(dum_dealer_to_customer*dum_HY,na.rm = TRUE),
                                                 daily_vol_customer_sold_HY = sum(dum_customer_to_dealer*`Quantity(ParVal)_num`*dum_HY,na.rm = TRUE)/1e9,
                                                 daily_num_customer_sold_HY = sum(dum_customer_to_dealer*dum_HY,na.rm = TRUE),
                                                 daily_vol_interdealer_HY = sum(dum_interdealer*`Quantity(ParVal)_num`*dum_HY,na.rm = TRUE)/1e9,
                                                 daily_num_interdealer_HY= sum(dum_interdealer*dum_HY,na.rm = TRUE),
                                                 daily_avg_vol_customer_sold_HY = sum(dum_customer_to_dealer*`Quantity(ParVal)_num`*dum_HY,na.rm = TRUE)/1e9/sum(dum_customer_to_dealer*dum_HY,na.rm = TRUE),
                                                 daily_avg_vol_customer_bought_HY = sum(dum_dealer_to_customer*`Quantity(ParVal)_num`*dum_HY,na.rm = TRUE)/1e9/sum(dum_dealer_to_customer*dum_HY,na.rm = TRUE),
                                                 daily_avg_vol_interdealer_HY = sum(dum_interdealer*`Quantity(ParVal)_num`*dum_HY,na.rm = TRUE)/1e9/sum(dum_interdealer*dum_HY,na.rm = TRUE),
                                                 
                                                 daily_vol_customer_bought_TTM5ymore = sum(dum_dealer_to_customer*`Quantity(ParVal)_num`*dum_TTM5ymore,na.rm = TRUE)/1e9,
                                                 daily_num_customer_bought_TTM5ymore = sum(dum_dealer_to_customer*dum_TTM5ymore,na.rm = TRUE),
                                                 daily_vol_customer_sold_TTM5ymore = sum(dum_customer_to_dealer*`Quantity(ParVal)_num`*dum_TTM5ymore,na.rm = TRUE)/1e9,
                                                 daily_num_customer_sold_TTM5ymore = sum(dum_customer_to_dealer*dum_TTM5ymore,na.rm = TRUE),
                                                 daily_vol_interdealer_TTM5ymore = sum(dum_interdealer*`Quantity(ParVal)_num`*dum_TTM5ymore,na.rm = TRUE)/1e9,
                                                 daily_num_interdealer_TTM5ymore= sum(dum_interdealer*dum_TTM5ymore,na.rm = TRUE),
                                                 daily_avg_vol_customer_sold_TTM5ymore = sum(dum_customer_to_dealer*`Quantity(ParVal)_num`*dum_TTM5ymore,na.rm = TRUE)/1e9/sum(dum_customer_to_dealer*dum_TTM5ymore,na.rm = TRUE),
                                                 daily_avg_vol_customer_bought_TTM5ymore = sum(dum_dealer_to_customer*`Quantity(ParVal)_num`*dum_TTM5ymore,na.rm = TRUE)/1e9/sum(dum_dealer_to_customer*dum_TTM5ymore,na.rm = TRUE),
                                                 daily_avg_vol_interdealer_TTM5ymore = sum(dum_interdealer*`Quantity(ParVal)_num`*dum_TTM5ymore,na.rm = TRUE)/1e9/sum(dum_interdealer*dum_TTM5ymore,na.rm = TRUE),
                                                 
                                                 daily_vol_customer_bought_Eligible = sum(dum_dealer_to_customer*`Quantity(ParVal)_num`*dum_Eligible,na.rm = TRUE)/1e9,
                                                 daily_vol_customer_sold_Eligible = sum(dum_customer_to_dealer*`Quantity(ParVal)_num`*dum_Eligible,na.rm = TRUE)/1e9),
                                              by=c("trd_exctn_dt")]
  
  tmp12[,
        `:=`(daily_netinflow_vol_IG=daily_vol_customer_sold_IG-daily_vol_customer_bought_IG,
             daily_netinflow_vol_HY=daily_vol_customer_sold_HY-daily_vol_customer_bought_HY,
             daily_netinflow_vol_TTM5yless=daily_vol_customer_sold_TTM5yless-daily_vol_customer_bought_TTM5yless,
             daily_netinflow_vol_TTM5ymore=daily_vol_customer_sold_TTM5ymore-daily_vol_customer_bought_TTM5ymore,
             daily_netinflow_vol_IG_TTM5yless=daily_vol_customer_sold_IG_TTM5yless-daily_vol_customer_bought_IG_TTM5yless,
             daily_netinflow_vol_HY_TTM5yless=daily_vol_customer_sold_HY_TTM5yless-daily_vol_customer_bought_HY_TTM5yless,
             daily_netinflow_vol_IG_TTM5ymore=daily_vol_customer_sold_IG_TTM5ymore-daily_vol_customer_bought_IG_TTM5ymore,
             daily_netinflow_vol_HY_TTM5ymore=daily_vol_customer_sold_HY_TTM5ymore-daily_vol_customer_bought_HY_TTM5ymore,
             
             daily_netinflow_vol_US=daily_vol_customer_sold_US-daily_vol_customer_bought_US,
             daily_netinflow_vol_nonUS=daily_vol_customer_sold_nonUS-daily_vol_customer_bought_nonUS,
             daily_netinflow_vol_Finance=daily_vol_customer_sold_Finance-daily_vol_customer_bought_Finance,
             daily_netinflow_vol_Utility=daily_vol_customer_sold_Utility-daily_vol_customer_bought_Utility,
             daily_netinflow_vol_Transportation=daily_vol_customer_sold_Transportation-daily_vol_customer_bought_Transportation,
             daily_netinflow_vol_OilGas=daily_vol_customer_sold_OilGas-daily_vol_customer_bought_OilGas,
             
             daily_netinflow_vol_Eligible=daily_vol_customer_sold_Eligible-daily_vol_customer_bought_Eligible)]
  
  
  TEMP_DATA<-TradeData_filter_masterfundamental_allcapacity[,c("cusip_id","trd_exctn_dt","dum_IG","dum_HY",
                                                   ExtraDum),
                                                with=FALSE]
  TEMP_DATA[,`:=`(count_seq=sequence(.N)),
            by=c("cusip_id","trd_exctn_dt")]
  TEMP_DATA<-TEMP_DATA[count_seq==1]
  
  tmp13 <- TEMP_DATA[,.(daily_numbonds_IG = sum(dum_IG,na.rm = TRUE),
                        daily_numbonds_HY = sum(dum_HY,na.rm = TRUE),
                        daily_numbonds_OilGas = sum(dum_OilGas,na.rm = TRUE),
                        daily_numbonds_Utility = sum(dum_Utility,na.rm = TRUE),
                        daily_numbonds_Finance = sum(dum_Finance,na.rm = TRUE),
                        daily_numbonds_Transportation = sum(dum_Transportation,na.rm = TRUE),
                        daily_numbonds_US = sum(dum_US,na.rm = TRUE),
                        daily_numbonds_nonUS = sum(dum_nonUS,na.rm = TRUE),
                        daily_numbonds = .N,
                        daily_numbonds_TTM5yless = sum(dum_TTM5yless,na.rm = TRUE),
                        daily_numbonds_TTM5ymore = sum(dum_TTM5ymore,na.rm = TRUE)),
                     by=c("trd_exctn_dt")]
  ###number of capped transactions
  # tmp14 <- TradeData_filter_masterfundamental[ ,.(daily_num_5MM_all = sum(`dum_Quantity(ParVal)_5MM`,na.rm = TRUE)), 
  #                                              by=c("trd_exctn_dt")]
  
  tmp15 <- TradeData_filter_masterfundamental_allcapacity[bond_grade=="I",.(daily_num_5MM_IG = sum(`dum_Quantity(ParVal)_5MM`,na.rm = TRUE),
                                                                daily_vol_5MM_IG = sum(`dum_Quantity(ParVal)_5MM`*`Quantity(ParVal)_num`,na.rm = TRUE)),
                                              by=c("trd_exctn_dt")]
  
  tmp16 <- TradeData_filter_masterfundamental_allcapacity[bond_grade=="H",.(daily_num_1MM_HY = sum(`dum_Quantity(ParVal)_1MM`,na.rm = TRUE),
                                                                daily_vol_1MM_HY = sum(`dum_Quantity(ParVal)_1MM`*`Quantity(ParVal)_num`,na.rm = TRUE)),
                                              by=c("trd_exctn_dt")]
  
  # tmp15_2 <- TradeData_filter_masterfundamental[dum_TTM5yless==1,.(daily_num_5MM_TTM5yless = sum(`dum_Quantity(ParVal)_5MM`,na.rm = TRUE)),
  #                                               by=c("trd_exctn_dt")]
  # tmp16_2 <- TradeData_filter_masterfundamental[dum_TTM5ymore==1,.(daily_num_5MM_TTM5ymore = sum(`dum_Quantity(ParVal)_5MM`,na.rm = TRUE)),
  #                                               by=c("trd_exctn_dt")]
  # 
  # tmp17 <- TradeData_filter_masterfundamental[ ,.(daily_num_1MM_all = sum(`dum_Quantity(ParVal)_1MM`,na.rm = TRUE)), 
  #                                              by=c("trd_exctn_dt")]
  # 
  # tmp18 <- TradeData_filter_masterfundamental[bond_grade=="I",.(daily_num_1MM_IG = sum(`dum_Quantity(ParVal)_1MM`,na.rm = TRUE)),
  #                                             by=c("trd_exctn_dt")]
  # tmp19 <- TradeData_filter_masterfundamental[bond_grade=="H",.(daily_num_1MM_HY = sum(`dum_Quantity(ParVal)_1MM`,na.rm = TRUE)),
  #                                             by=c("trd_exctn_dt")]
  # 
  # tmp18_2 <- TradeData_filter_masterfundamental[dum_TTM5yless==1,.(daily_num_1MM_TTM5yless = sum(`dum_Quantity(ParVal)_1MM`,na.rm = TRUE)),
  #                                               by=c("trd_exctn_dt")]
  # tmp19_2 <- TradeData_filter_masterfundamental[dum_TTM5ymore==1,.(daily_num_1MM_TTM5ymore = sum(`dum_Quantity(ParVal)_1MM`,na.rm = TRUE)),
  #                                               by=c("trd_exctn_dt")]
  
  tmp20 <- TradeData_filter_masterfundamental_allcapacity[,
                                              .(daily_num_5MM_IG_C2D=sum(`dum_Quantity(ParVal)_5MM`*dum_IG*dum_customer_to_dealer,na.rm = TRUE),
                                                daily_vol_5MM_IG_C2D=sum(`dum_Quantity(ParVal)_5MM`*dum_IG*dum_customer_to_dealer*`Quantity(ParVal)_num`,na.rm = TRUE),
                                                daily_num_5MM_IG_D2C=sum(`dum_Quantity(ParVal)_5MM`*dum_IG*dum_dealer_to_customer,na.rm = TRUE),
                                                daily_vol_5MM_IG_D2C=sum(`dum_Quantity(ParVal)_5MM`*dum_IG*dum_dealer_to_customer*`Quantity(ParVal)_num`,na.rm = TRUE),
                                                
                                                daily_num_1MM_HY_C2D=sum(`dum_Quantity(ParVal)_1MM`*dum_HY*dum_customer_to_dealer,na.rm = TRUE),
                                                daily_vol_1MM_HY_C2D=sum(`dum_Quantity(ParVal)_1MM`*dum_HY*dum_customer_to_dealer*`Quantity(ParVal)_num`,na.rm = TRUE),
                                                daily_num_1MM_HY_D2C=sum(`dum_Quantity(ParVal)_1MM`*dum_HY*dum_dealer_to_customer,na.rm = TRUE),
                                                daily_vol_1MM_HY_D2C=sum(`dum_Quantity(ParVal)_1MM`*dum_HY*dum_dealer_to_customer*`Quantity(ParVal)_num`,na.rm = TRUE)),
                                              by=c("trd_exctn_dt")]
  
  # tmp21 <- TradeData_filter_masterfundamental[,
  #                                             .(daily_numtrades_customersold_IG_above5MM=sum(`dum_Quantity(ParVal)_5MM`*dum_IG*dum_customer_to_dealer,na.rm = TRUE),
  #                                               daily_numtrades_customerbought_IG_above5MM=sum(`dum_Quantity(ParVal)_5MM`*dum_IG*dum_dealer_to_customer,na.rm = TRUE),
  #                                               daily_numtrades_customersold_IG_below5MM=sum((1-`dum_Quantity(ParVal)_5MM`)*dum_IG*dum_customer_to_dealer,na.rm = TRUE),
  #                                               daily_numtrades_customerbought_IG_below5MM=sum((1-`dum_Quantity(ParVal)_5MM`)*dum_IG*dum_dealer_to_customer,na.rm = TRUE),
  #                                               
  #                                               
  #                                               daily_numtrades_customersold_HY_above1MM=sum(`dum_Quantity(ParVal)_1MM`*dum_HY*dum_customer_to_dealer,na.rm = TRUE),
  #                                               daily_numtrades_customerbought_HY_above1MM=sum(`dum_Quantity(ParVal)_1MM`*dum_HY*dum_dealer_to_customer,na.rm = TRUE),
  #                                               daily_numtrades_customersold_HY_below1MM=sum((1-`dum_Quantity(ParVal)_1MM`)*dum_HY*dum_customer_to_dealer,na.rm = TRUE),
  #                                               daily_numtrades_customerbought_HY_below1MM=sum((1-`dum_Quantity(ParVal)_1MM`)*dum_HY*dum_dealer_to_customer,na.rm = TRUE)),
  #                                             by=c("trd_exctn_dt")]
  
  
  tmp_cv_CustBuy <- TradeData_filter_masterfundamental_allcapacity[dum_dealer_to_customer==1,
                                                       .(cv_vol_customer_bought = 
                                                           sd(`Quantity(ParVal)_num`,na.rm = TRUE)/mean(`Quantity(ParVal)_num`,na.rm = TRUE),
                                                         sdlog_vol_custmer_bought = sd(log(`Quantity(ParVal)_num`),na.rm = TRUE),
                                                         vol_p5_custmer_bought=quantile(`Quantity(ParVal)_num`,probs = 0.05,na.rm = TRUE),
                                                         vol_p10_custmer_bought=quantile(`Quantity(ParVal)_num`,probs = 0.10,na.rm = TRUE),
                                                         vol_p25_custmer_bought=quantile(`Quantity(ParVal)_num`,probs = 0.25,na.rm = TRUE),
                                                         vol_p50_custmer_bought=quantile(`Quantity(ParVal)_num`,probs = 0.50,na.rm = TRUE),
                                                         vol_p75_custmer_bought=quantile(`Quantity(ParVal)_num`,probs = 0.75,na.rm = TRUE),
                                                         vol_p90_custmer_bought=quantile(`Quantity(ParVal)_num`,probs = 0.90,na.rm = TRUE),
                                                         vol_p95_custmer_bought=quantile(`Quantity(ParVal)_num`,probs = 0.95,na.rm = TRUE),
                                                         vol_p99_custmer_bought=quantile(`Quantity(ParVal)_num`,probs = 0.99,na.rm = TRUE),
                                                         vol_p100_custmer_bought=quantile(`Quantity(ParVal)_num`,probs = 1,na.rm = TRUE)),
                                                       by=c("trd_exctn_dt")]
  
  
  tmp_cv_CustSell <- TradeData_filter_masterfundamental_allcapacity[dum_customer_to_dealer==1,
                                                        .(cv_vol_customer_sold = sd(`Quantity(ParVal)_num`,na.rm = TRUE)/mean(`Quantity(ParVal)_num`,na.rm = TRUE),
                                                          sdlog_vol_custmer_sold = sd(log(`Quantity(ParVal)_num`),na.rm = TRUE),
                                                          vol_p5_custmer_sold=quantile(`Quantity(ParVal)_num`,probs = 0.05,na.rm = TRUE),
                                                          vol_p10_custmer_sold=quantile(`Quantity(ParVal)_num`,probs = 0.10,na.rm = TRUE),
                                                          vol_p25_custmer_sold=quantile(`Quantity(ParVal)_num`,probs = 0.25,na.rm = TRUE),
                                                          vol_p50_custmer_sold=quantile(`Quantity(ParVal)_num`,probs = 0.50,na.rm = TRUE),
                                                          vol_p75_custmer_sold=quantile(`Quantity(ParVal)_num`,probs = 0.75,na.rm = TRUE),
                                                          vol_p90_custmer_sold=quantile(`Quantity(ParVal)_num`,probs = 0.90,na.rm = TRUE),
                                                          vol_p95_custmer_sold=quantile(`Quantity(ParVal)_num`,probs = 0.95,na.rm = TRUE),
                                                          vol_p99_custmer_sold=quantile(`Quantity(ParVal)_num`,probs = 0.99,na.rm = TRUE),
                                                          vol_p100_custmer_sold=quantile(`Quantity(ParVal)_num`,probs = 1,na.rm = TRUE)),
                                                        by=c("trd_exctn_dt")]
  
  
  tmp_cv_Interdealer <- TradeData_filter_masterfundamental_allcapacity[dum_interdealer==1,
                                                           .(cv_vol_interdealer = 
                                                               sd(`Quantity(ParVal)_num`,na.rm = TRUE)/mean(`Quantity(ParVal)_num`,na.rm = TRUE),
                                                             sdlog_vol_interdealer = sd(log(`Quantity(ParVal)_num`),na.rm = TRUE),
                                                             vol_p5_interdealer=quantile(`Quantity(ParVal)_num`,probs = 0.05,na.rm = TRUE),
                                                             vol_p10_interdealer=quantile(`Quantity(ParVal)_num`,probs = 0.10,na.rm = TRUE),
                                                             vol_p25_interdealer=quantile(`Quantity(ParVal)_num`,probs = 0.25,na.rm = TRUE),
                                                             vol_p50_interdealer=quantile(`Quantity(ParVal)_num`,probs = 0.50,na.rm = TRUE),
                                                             vol_p75_interdealer=quantile(`Quantity(ParVal)_num`,probs = 0.75,na.rm = TRUE),
                                                             vol_p90_interdealer=quantile(`Quantity(ParVal)_num`,probs = 0.90,na.rm = TRUE),
                                                             vol_p95_interdealer=quantile(`Quantity(ParVal)_num`,probs = 0.95,na.rm = TRUE),
                                                             vol_p99_interdealer=quantile(`Quantity(ParVal)_num`,probs = 0.99,na.rm = TRUE),
                                                             vol_p100_interdealer=quantile(`Quantity(ParVal)_num`,probs = 1,na.rm = TRUE)),
                                                           by=c("trd_exctn_dt")]
  
  tmp <- left_join(tmp_irc_all_date,tmp_irc_IG_date,by=c("trd_exctn_dt"))
  for (new_tmp in c(setdiff(ls(pattern = "^tmp"),c("tmp","tmp_irc_all_date","tmp_irc_IG_date","tmp_irc_cusip_date_vol")))){
    tmp <- left_join(tmp, get(new_tmp), by=c("trd_exctn_dt"))
  }
  
  tmp<-as.data.table(tmp)
  tmp<-tmp[order(trd_exctn_dt)]
  
  tmp[,`:=`(prop_agency_num=daily_num_agencytrade/(daily_num_agencytrade+daily_num_principaltrade),
            prop_agency_vol=daily_vol_agencytrade/(daily_vol_agencytrade+daily_vol_principaltrade),
            
            prop_agency_num_IG=daily_num_agencytrade_IG/(daily_num_agencytrade_IG+daily_num_principaltrade_IG),
            prop_agency_vol_IG=daily_vol_agencytrade_IG/(daily_vol_agencytrade_IG+daily_vol_principaltrade_IG),
            
            prop_agency_num_US=daily_num_agencytrade_US/(daily_num_agencytrade_US+daily_num_principaltrade_US),
            prop_agency_vol_US=daily_vol_agencytrade_US/(daily_vol_agencytrade_US+daily_vol_principaltrade_US),
            prop_agency_num_nonUS=daily_num_agencytrade_nonUS/(daily_num_agencytrade_nonUS+daily_num_principaltrade_nonUS),
            prop_agency_vol_nonUS=daily_vol_agencytrade_nonUS/(daily_vol_agencytrade_nonUS+daily_vol_principaltrade_nonUS),
            prop_agency_num_Finance=daily_num_agencytrade_Finance/(daily_num_agencytrade_Finance+daily_num_principaltrade_Finance),
            prop_agency_vol_Finance=daily_vol_agencytrade_Finance/(daily_vol_agencytrade_Finance+daily_vol_principaltrade_Finance),
            prop_agency_num_Utility=daily_num_agencytrade_Utility/(daily_num_agencytrade_Utility+daily_num_principaltrade_Utility),
            prop_agency_vol_Utility=daily_vol_agencytrade_Utility/(daily_vol_agencytrade_Utility+daily_vol_principaltrade_Utility),
            prop_agency_num_Transportation=daily_num_agencytrade_Transportation/(daily_num_agencytrade_Transportation+daily_num_principaltrade_Transportation),
            prop_agency_vol_Transportation=daily_vol_agencytrade_Transportation/(daily_vol_agencytrade_Transportation+daily_vol_principaltrade_Transportation),
            prop_agency_num_OilGas=daily_num_agencytrade_OilGas/(daily_num_agencytrade_OilGas+daily_num_principaltrade_OilGas),
            prop_agency_vol_OilGas=daily_vol_agencytrade_OilGas/(daily_vol_agencytrade_OilGas+daily_vol_principaltrade_OilGas),
            
            
            prop_agency_num_HY=daily_num_agencytrade_HY/(daily_num_agencytrade_HY+daily_num_principaltrade_HY),
            prop_agency_vol_HY=daily_vol_agencytrade_HY/(daily_vol_agencytrade_HY+daily_vol_principaltrade_HY),
            
            prop_agency_num_TTM5yless=daily_num_agencytrade_TTM5yless/(daily_num_agencytrade_TTM5yless+daily_num_principaltrade_TTM5yless),
            prop_agency_vol_TTM5yless=daily_vol_agencytrade_TTM5yless/(daily_vol_agencytrade_TTM5yless+daily_vol_principaltrade_TTM5yless),
            
            prop_agency_num_TTM5ymore=daily_num_agencytrade_TTM5ymore/(daily_num_agencytrade_TTM5ymore+daily_num_principaltrade_TTM5ymore),
            prop_agency_vol_TTM5ymore=daily_vol_agencytrade_TTM5ymore/(daily_vol_agencytrade_TTM5ymore+daily_vol_principaltrade_TTM5ymore),
            
            prop_agency_vol_IG_TTM5yless=daily_vol_agencytrade_IG_TTM5yless/(daily_vol_agencytrade_IG_TTM5yless+daily_vol_principaltrade_IG_TTM5yless),
            prop_agency_vol_IG_TTM5ymore=daily_vol_agencytrade_IG_TTM5ymore/(daily_vol_agencytrade_IG_TTM5ymore+daily_vol_principaltrade_IG_TTM5ymore),
            prop_agency_vol_HY_TTM5yless=daily_vol_agencytrade_HY_TTM5yless/(daily_vol_agencytrade_HY_TTM5yless+daily_vol_principaltrade_HY_TTM5yless),
            prop_agency_vol_HY_TTM5ymore=daily_vol_agencytrade_HY_TTM5ymore/(daily_vol_agencytrade_HY_TTM5ymore+daily_vol_principaltrade_HY_TTM5ymore))]
  
  tmp<-as.data.table(tmp)
  tmp[,`:=`(cumulative_inven_change_all=cumsum(daily_netinflow_vol),
            cumulative_inven_change_IG=cumsum(daily_netinflow_vol_IG),
            cumulative_inven_change_HY=cumsum(daily_netinflow_vol_HY),
            cumulative_inven_change_TTM5yless=cumsum(daily_netinflow_vol_TTM5yless),
            cumulative_inven_change_TTM5ymore=cumsum(daily_netinflow_vol_TTM5ymore),
            
            cumulative_inven_change_US=cumsum(daily_netinflow_vol_US),
            cumulative_inven_change_nonUS=cumsum(daily_netinflow_vol_nonUS),
            cumulative_inven_change_Finance=cumsum(daily_netinflow_vol_Finance),
            cumulative_inven_change_Utility=cumsum(daily_netinflow_vol_Utility),
            cumulative_inven_change_Transportation=cumsum(daily_netinflow_vol_Transportation),
            cumulative_inven_change_OilGas=cumsum(daily_netinflow_vol_OilGas),
            
            cumulative_inven_change_IG_TTM5yless=cumsum(daily_netinflow_vol_IG_TTM5yless),
            cumulative_inven_change_IG_TTM5ymore=cumsum(daily_netinflow_vol_IG_TTM5ymore),
            cumulative_inven_change_HY_TTM5yless=cumsum(daily_netinflow_vol_HY_TTM5yless),
            cumulative_inven_change_HY_TTM5ymore=cumsum(daily_netinflow_vol_HY_TTM5ymore),
            
            cumulative_inven_change_Eligible=cumsum(daily_netinflow_vol_Eligible))]
  
  fwrite(tmp,
         paste0("SumStat_",E,"_standard_TRACE.csv"))

} 
  
  